From 8222d5910dae08213b6d9d4bc9a7f8502855e624 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 9 Feb 2023 09:09:52 +0800 Subject: xfrm: Zero padding when dumping algos and encap When copying data to user-space we should ensure that only valid data is copied over. Padding in structures may be filled with random (possibly sensitve) data and should never be given directly to user-space. This patch fixes the copying of xfrm algorithms and the encap template in xfrm_user so that padding is zeroed. Reported-by: syzbot+fa5414772d5c445dac3c@syzkaller.appspotmail.com Reported-by: Hyunwoo Kim Signed-off-by: Herbert Xu Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 45 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index cf5172d4ce68..103af2b3e986 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1012,7 +1012,9 @@ static int copy_to_user_aead(struct xfrm_algo_aead *aead, struct sk_buff *skb) return -EMSGSIZE; ap = nla_data(nla); - memcpy(ap, aead, sizeof(*aead)); + strscpy_pad(ap->alg_name, aead->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = aead->alg_key_len; + ap->alg_icv_len = aead->alg_icv_len; if (redact_secret && aead->alg_key_len) memset(ap->alg_key, 0, (aead->alg_key_len + 7) / 8); @@ -1032,7 +1034,8 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) return -EMSGSIZE; ap = nla_data(nla); - memcpy(ap, ealg, sizeof(*ealg)); + strscpy_pad(ap->alg_name, ealg->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = ealg->alg_key_len; if (redact_secret && ealg->alg_key_len) memset(ap->alg_key, 0, (ealg->alg_key_len + 7) / 8); @@ -1043,6 +1046,40 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) return 0; } +static int copy_to_user_calg(struct xfrm_algo *calg, struct sk_buff *skb) +{ + struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_COMP, sizeof(*calg)); + struct xfrm_algo *ap; + + if (!nla) + return -EMSGSIZE; + + ap = nla_data(nla); + strscpy_pad(ap->alg_name, calg->alg_name, sizeof(ap->alg_name)); + ap->alg_key_len = 0; + + return 0; +} + +static int copy_to_user_encap(struct xfrm_encap_tmpl *ep, struct sk_buff *skb) +{ + struct nlattr *nla = nla_reserve(skb, XFRMA_ENCAP, sizeof(*ep)); + struct xfrm_encap_tmpl *uep; + + if (!nla) + return -EMSGSIZE; + + uep = nla_data(nla); + memset(uep, 0, sizeof(*uep)); + + uep->encap_type = ep->encap_type; + uep->encap_sport = ep->encap_sport; + uep->encap_dport = ep->encap_dport; + uep->encap_oa = ep->encap_oa; + + return 0; +} + static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m) { int ret = 0; @@ -1098,12 +1135,12 @@ static int copy_to_user_state_extra(struct xfrm_state *x, goto out; } if (x->calg) { - ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + ret = copy_to_user_calg(x->calg, skb); if (ret) goto out; } if (x->encap) { - ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + ret = copy_to_user_encap(x->encap, skb); if (ret) goto out; } -- cgit From 6715df8d5d24655b9fd368e904028112b54c7de1 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sun, 19 Feb 2023 22:04:26 +0200 Subject: bpf: Allow reads from uninit stack This commits updates the following functions to allow reads from uninitialized stack locations when env->allow_uninit_stack option is enabled: - check_stack_read_fixed_off() - check_stack_range_initialized(), called from: - check_stack_read_var_off() - check_helper_mem_access() Such change allows to relax logic in stacksafe() to treat STACK_MISC and STACK_INVALID in a same way and make the following stack slot configurations equivalent: | Cached state | Current state | | stack slot | stack slot | |------------------+------------------| | STACK_INVALID or | STACK_INVALID or | | STACK_MISC | STACK_SPILL or | | | STACK_MISC or | | | STACK_ZERO or | | | STACK_DYNPTR | This leads to significant verification speed gains (see below). The idea was suggested by Andrii Nakryiko [1] and initial patch was created by Alexei Starovoitov [2]. Currently the env->allow_uninit_stack is allowed for programs loaded by users with CAP_PERFMON or CAP_SYS_ADMIN capabilities. A number of test cases from verifier/*.c were expecting uninitialized stack access to be an error. These test cases were updated to execute in unprivileged mode (thus preserving the tests). The test progs/test_global_func10.c expected "invalid indirect read from stack" error message because of the access to uninitialized memory region. This error is no longer possible in privileged mode. The test is updated to provoke an error "invalid indirect access to stack" because of access to invalid stack address (such error is not verified by progs/test_global_func*.c series of tests). The following tests had to be removed because these can't be made unprivileged: - verifier/sock.c: - "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value" BPF_PROG_TYPE_SCHED_CLS programs are not executed in unprivileged mode. - verifier/var_off.c: - "indirect variable-offset stack access, max_off+size > max_initialized" - "indirect variable-offset stack access, uninitialized" These tests verify that access to uninitialized stack values is detected when stack offset is not a constant. However, variable stack access is prohibited in unprivileged mode, thus these tests are no longer valid. * * * Here is veristat log comparing this patch with current master on a set of selftest binaries listed in tools/testing/selftests/bpf/veristat.cfg and cilium BPF binaries (see [3]): $ ./veristat -e file,prog,states -C -f 'states_pct<-30' master.log current.log File Program States (A) States (B) States (DIFF) -------------------------- -------------------------- ---------- ---------- ---------------- bpf_host.o tail_handle_ipv6_from_host 349 244 -105 (-30.09%) bpf_host.o tail_handle_nat_fwd_ipv4 1320 895 -425 (-32.20%) bpf_lxc.o tail_handle_nat_fwd_ipv4 1320 895 -425 (-32.20%) bpf_sock.o cil_sock4_connect 70 48 -22 (-31.43%) bpf_sock.o cil_sock4_sendmsg 68 46 -22 (-32.35%) bpf_xdp.o tail_handle_nat_fwd_ipv4 1554 803 -751 (-48.33%) bpf_xdp.o tail_lb_ipv4 6457 2473 -3984 (-61.70%) bpf_xdp.o tail_lb_ipv6 7249 3908 -3341 (-46.09%) pyperf600_bpf_loop.bpf.o on_event 287 145 -142 (-49.48%) strobemeta.bpf.o on_event 15915 4772 -11143 (-70.02%) strobemeta_nounroll2.bpf.o on_event 17087 3820 -13267 (-77.64%) xdp_synproxy_kern.bpf.o syncookie_tc 21271 6635 -14636 (-68.81%) xdp_synproxy_kern.bpf.o syncookie_xdp 23122 6024 -17098 (-73.95%) -------------------------- -------------------------- ---------- ---------- ---------------- Note: I limited selection by states_pct<-30%. Inspection of differences in pyperf600_bpf_loop behavior shows that the following patch for the test removes almost all differences: - a/tools/testing/selftests/bpf/progs/pyperf.h + b/tools/testing/selftests/bpf/progs/pyperf.h @ -266,8 +266,8 @ int __on_event(struct bpf_raw_tracepoint_args *ctx) } if (event->pthread_match || !pidData->use_tls) { - void* frame_ptr; - FrameData frame; + void* frame_ptr = 0; + FrameData frame = {}; Symbol sym = {}; int cur_cpu = bpf_get_smp_processor_id(); W/o this patch the difference comes from the following pattern (for different variables): static bool get_frame_data(... FrameData *frame ...) { ... bpf_probe_read_user(&frame->f_code, ...); if (!frame->f_code) return false; ... bpf_probe_read_user(&frame->co_name, ...); if (frame->co_name) ...; } int __on_event(struct bpf_raw_tracepoint_args *ctx) { FrameData frame; ... get_frame_data(... &frame ...) // indirectly via a bpf_loop & callback ... } SEC("raw_tracepoint/kfree_skb") int on_event(struct bpf_raw_tracepoint_args* ctx) { ... ret |= __on_event(ctx); ret |= __on_event(ctx); ... } With regards to value `frame->co_name` the following is important: - Because of the conditional `if (!frame->f_code)` each call to __on_event() produces two states, one with `frame->co_name` marked as STACK_MISC, another with it as is (and marked STACK_INVALID on a first call). - The call to bpf_probe_read_user() does not mark stack slots corresponding to `&frame->co_name` as REG_LIVE_WRITTEN but it marks these slots as BPF_MISC, this happens because of the following loop in the check_helper_call(): for (i = 0; i < meta.access_size; i++) { err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1, false); if (err) return err; } Note the size of the write, it is a one byte write for each byte touched by a helper. The BPF_B write does not lead to write marks for the target stack slot. - Which means that w/o this patch when second __on_event() call is verified `if (frame->co_name)` will propagate read marks first to a stack slot with STACK_MISC marks and second to a stack slot with STACK_INVALID marks and these states would be considered different. [1] https://lore.kernel.org/bpf/CAEf4BzY3e+ZuC6HUa8dCiUovQRg2SzEk7M-dSkqNZyn=xEmnPA@mail.gmail.com/ [2] https://lore.kernel.org/bpf/CAADnVQKs2i1iuZ5SUGuJtxWVfGYR9kDgYKhq3rNV+kBLQCu7rA@mail.gmail.com/ [3] git@github.com:anakryiko/cilium.git Suggested-by: Andrii Nakryiko Co-developed-by: Alexei Starovoitov Signed-off-by: Eduard Zingerman Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230219200427.606541-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 11 ++- .../selftests/bpf/progs/test_global_func10.c | 8 +- tools/testing/selftests/bpf/verifier/calls.c | 13 ++- .../selftests/bpf/verifier/helper_access_var_len.c | 104 ++++++++++++++------- tools/testing/selftests/bpf/verifier/int_ptr.c | 9 +- .../selftests/bpf/verifier/search_pruning.c | 13 ++- tools/testing/selftests/bpf/verifier/sock.c | 27 ------ tools/testing/selftests/bpf/verifier/spill_fill.c | 7 +- tools/testing/selftests/bpf/verifier/var_off.c | 52 ----------- 9 files changed, 108 insertions(+), 136 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 272563a0b770..d517d13878cf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3826,6 +3826,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, continue; if (type == STACK_MISC) continue; + if (type == STACK_INVALID && env->allow_uninit_stack) + continue; verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -3863,6 +3865,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, continue; if (type == STACK_ZERO) continue; + if (type == STACK_INVALID && env->allow_uninit_stack) + continue; verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -5754,7 +5758,8 @@ static int check_stack_range_initialized( stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; if (*stype == STACK_MISC) goto mark; - if (*stype == STACK_ZERO) { + if ((*stype == STACK_ZERO) || + (*stype == STACK_INVALID && env->allow_uninit_stack)) { if (clobber) { /* helper can write anything into the stack */ *stype = STACK_MISC; @@ -13936,6 +13941,10 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue; + if (env->allow_uninit_stack && + old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC) + continue; + /* explored stack has more populated slots than current stack * and these slots were used */ diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c index 98327bdbbfd2..8fba3f3649e2 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func10.c +++ b/tools/testing/selftests/bpf/progs/test_global_func10.c @@ -5,12 +5,12 @@ #include "bpf_misc.h" struct Small { - int x; + long x; }; struct Big { - int x; - int y; + long x; + long y; }; __noinline int foo(const struct Big *big) @@ -22,7 +22,7 @@ __noinline int foo(const struct Big *big) } SEC("cgroup_skb/ingress") -__failure __msg("invalid indirect read from stack") +__failure __msg("invalid indirect access to stack") int global_func10(struct __sk_buff *skb) { const struct Small small = {.x = skb->len }; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 9d993926bf0e..289ed202ec66 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -2221,19 +2221,22 @@ * that fp-8 stack slot was unused in the fall-through * branch and will accept the program incorrectly */ - BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 2, 2), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_JMP_IMM(BPF_JA, 0, 0, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_48b = { 6 }, - .errstr = "invalid indirect read from stack R2 off -8+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map_hash_48b = { 7 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "calls: ctx read at start of subprog", diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c index a6c869a7319c..9c4885885aba 100644 --- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c +++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c @@ -29,19 +29,30 @@ { "helper access to variable memory: stack, bitwise AND, zero included", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + /* use bitwise AND to limit r3 range to [0, 64] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 64), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory at &fp[-64] is + * not initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 4 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: stack, bitwise AND + JMP, wrong max", @@ -183,20 +194,31 @@ { "helper access to variable memory: stack, JMP, no min check", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + /* use JMP to limit r3 range to [0, 64] */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 64, 6), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory at &fp[-64] is + * not initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 4 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: stack, JMP (signed), no min check", @@ -564,29 +586,41 @@ { "helper access to variable memory: 8 bytes leak", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + /* Note: fp[-32] left uninitialized */ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + /* Limit r3 range to [1, 64] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 63), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory region [1, 64] + * at &fp[-64] is not fully initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+32 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 3 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+32 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: 8 bytes no leak (init memory)", diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c index 070893fb2900..02d9e004260b 100644 --- a/tools/testing/selftests/bpf/verifier/int_ptr.c +++ b/tools/testing/selftests/bpf/verifier/int_ptr.c @@ -54,12 +54,13 @@ /* bpf_strtoul() */ BPF_EMIT_CALL(BPF_FUNC_strtoul), - BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack R4 off -16+4 size 8", + .result_unpriv = REJECT, + .errstr_unpriv = "invalid indirect read from stack R4 off -16+4 size 8", + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "ARG_PTR_TO_LONG misaligned", diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index d63fd8991b03..745d6b5842fd 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -128,9 +128,10 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "invalid read from stack off -16+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr_unpriv = "invalid read from stack off -16+0 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "precision tracking for u32 spill/fill", @@ -258,6 +259,8 @@ BPF_EXIT_INSN(), }, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "invalid read from stack off -8+1 size 8", - .result = REJECT, + .errstr_unpriv = "invalid read from stack off -8+1 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index d11d0b28be41..108dd3ee1edd 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -530,33 +530,6 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, -{ - "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_4, 1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_sk_storage_get), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_sk_storage_map = { 14 }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "invalid indirect read from stack", -}, { "bpf_map_lookup_elem(smap, &key)", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 9bb302dade23..d1463bf4949a 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -171,9 +171,10 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "invalid read from stack off -4+0 size 4", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid read from stack off -4+0 size 4", + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "Spill a u32 const scalar. Refill as u16. Offset to skb->data", diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index d37f512fad16..b183e26c03f1 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -212,31 +212,6 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, -{ - "indirect variable-offset stack access, max_off+size > max_initialized", - .insns = { - /* Fill only the second from top 8 bytes of the stack. */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), - /* Add it to fp. We now have either fp-12 or fp-16, but we don't know - * which. fp-12 size 8 is partially uninitialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* Dereference it indirectly. */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack R2 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, { "indirect variable-offset stack access, min_off < min_initialized", .insns = { @@ -289,33 +264,6 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, -{ - "indirect variable-offset stack access, uninitialized", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 6), - BPF_MOV64_IMM(BPF_REG_3, 28), - /* Fill the top 16 bytes of the stack. */ - BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16), - /* Add it to fp. We now have either fp-12 or fp-16, we don't know - * which, but either way it points to initialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_5, 8), - /* Dereference it indirectly. */ - BPF_EMIT_CALL(BPF_FUNC_getsockopt), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect read from stack R4 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SOCK_OPS, -}, { "indirect variable-offset stack access, ok", .insns = { -- cgit From 6338a94d5ab42a94e96ea36edc5f7df1fe73e68e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sun, 19 Feb 2023 22:04:27 +0200 Subject: selftests/bpf: Tests for uninitialized stack reads Three testcases to make sure that stack reads from uninitialized locations are accepted by verifier when executed in privileged mode: - read from a fixed offset; - read from a variable offset; - passing a pointer to stack to a helper converts STACK_INVALID to STACK_MISC. Signed-off-by: Eduard Zingerman Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230219200427.606541-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/uninit_stack.c | 9 +++ tools/testing/selftests/bpf/progs/uninit_stack.c | 87 ++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/uninit_stack.c create mode 100644 tools/testing/selftests/bpf/progs/uninit_stack.c diff --git a/tools/testing/selftests/bpf/prog_tests/uninit_stack.c b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c new file mode 100644 index 000000000000..e64c71948491 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "uninit_stack.skel.h" + +void test_uninit_stack(void) +{ + RUN_TESTS(uninit_stack); +} diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c new file mode 100644 index 000000000000..8a403470e557 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uninit_stack.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +/* Read an uninitialized value from stack at a fixed offset */ +SEC("socket") +__naked int read_uninit_stack_fixed_off(void *ctx) +{ + asm volatile (" \ + r0 = 0; \ + /* force stack depth to be 128 */ \ + *(u64*)(r10 - 128) = r1; \ + r1 = *(u8 *)(r10 - 8 ); \ + r0 += r1; \ + r1 = *(u8 *)(r10 - 11); \ + r1 = *(u8 *)(r10 - 13); \ + r1 = *(u8 *)(r10 - 15); \ + r1 = *(u16*)(r10 - 16); \ + r1 = *(u32*)(r10 - 32); \ + r1 = *(u64*)(r10 - 64); \ + /* read from a spill of a wrong size, it is a separate \ + * branch in check_stack_read_fixed_off() \ + */ \ + *(u32*)(r10 - 72) = r1; \ + r1 = *(u64*)(r10 - 72); \ + r0 = 0; \ + exit; \ +" + ::: __clobber_all); +} + +/* Read an uninitialized value from stack at a variable offset */ +SEC("socket") +__naked int read_uninit_stack_var_off(void *ctx) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + /* force stack depth to be 64 */ \ + *(u64*)(r10 - 64) = r0; \ + r0 = -r0; \ + /* give r0 a range [-31, -1] */ \ + if r0 s<= -32 goto exit_%=; \ + if r0 s>= 0 goto exit_%=; \ + /* access stack using r0 */ \ + r1 = r10; \ + r1 += r0; \ + r2 = *(u8*)(r1 + 0); \ +exit_%=: r0 = 0; \ + exit; \ +" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +static __noinline void dummy(void) {} + +/* Pass a pointer to uninitialized stack memory to a helper. + * Passed memory block should be marked as STACK_MISC after helper call. + */ +SEC("socket") +__log_level(7) __msg("fp-104=mmmmmmmm") +__naked int helper_uninit_to_misc(void *ctx) +{ + asm volatile (" \ + /* force stack depth to be 128 */ \ + *(u64*)(r10 - 128) = r1; \ + r1 = r10; \ + r1 += -128; \ + r2 = 32; \ + call %[bpf_trace_printk]; \ + /* Call to dummy() forces print_verifier_state(..., true), \ + * thus showing the stack state, matched by __msg(). \ + */ \ + call %[dummy]; \ + r0 = 0; \ + exit; \ +" + : + : __imm(bpf_trace_printk), + __imm(dummy) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; -- cgit From 5f6e839ebc951c50f1ca06791d016c256f0285a9 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 22 Feb 2023 09:54:42 -0800 Subject: xen: update arch/x86/include/asm/xen/cpuid.h Update arch/x86/include/asm/xen/cpuid.h from the Xen tree to get newest definitions. This picks up some TSC mode definitions and comment formatting changes. Signed-off-by: Krister Johansen Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/94b9046dd0db3794f0633d134b7108508957758d.1677038165.git.kjlx@templeofstupid.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/cpuid.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h index 6daa9b0c8d11..a3c29b1496c8 100644 --- a/arch/x86/include/asm/xen/cpuid.h +++ b/arch/x86/include/asm/xen/cpuid.h @@ -89,11 +89,21 @@ * Sub-leaf 2: EAX: host tsc frequency in kHz */ +#define XEN_CPUID_TSC_EMULATED (1u << 0) +#define XEN_CPUID_HOST_TSC_RELIABLE (1u << 1) +#define XEN_CPUID_RDTSCP_INSTR_AVAIL (1u << 2) + +#define XEN_CPUID_TSC_MODE_DEFAULT (0) +#define XEN_CPUID_TSC_MODE_ALWAYS_EMULATE (1u) +#define XEN_CPUID_TSC_MODE_NEVER_EMULATE (2u) +#define XEN_CPUID_TSC_MODE_PVRDTSCP (3u) + /* * Leaf 5 (0x40000x04) * HVM-specific features * Sub-leaf 0: EAX: Features * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) + * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag) */ #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */ #define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Virtualized x2APIC accesses */ @@ -102,12 +112,16 @@ #define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */ #define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */ /* - * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be - * used to store high bits for the Destination ID. This expands the Destination - * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768. + * With interrupt format set to 0 (non-remappable) bits 55:49 from the + * IO-APIC RTE and bits 11:5 from the MSI address can be used to store + * high bits for the Destination ID. This expands the Destination ID + * field from 8 to 15 bits, allowing to target APIC IDs up 32768. */ #define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5) -/* Per-vCPU event channel upcalls */ +/* + * Per-vCPU event channel upcalls work correctly with physical IRQs + * bound to event channels. + */ #define XEN_HVM_CPUID_UPCALL_VECTOR (1u << 6) /* -- cgit From 99a7bcafbd0d04555074554573019096a8c10450 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 22 Feb 2023 09:54:56 -0800 Subject: x86/xen/time: cleanup xen_tsc_safe_clocksource Modifies xen_tsc_safe_clocksource() to use newly defined constants from arch/x86/include/asm/xen/cpuid.h. This replaces a numeric value with XEN_CPUID_TSC_MODE_NEVER_EMULATE, and deletes a comment that is now self explanatory. There should be no change in the function's behavior. Signed-off-by: Krister Johansen Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/a69ca370fecf85d312d2db633d9438ace2af6e5b.1677038165.git.kjlx@templeofstupid.com Signed-off-by: Juergen Gross --- arch/x86/xen/time.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 95140609c8a8..94056013a2a4 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -495,11 +496,7 @@ static int __init xen_tsc_safe_clocksource(void) /* Leaf 4, sub-leaf 0 (0x40000x03) */ cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx); - /* tsc_mode = no_emulate (2) */ - if (ebx != 2) - return 0; - - return 1; + return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE; } static void __init xen_time_init(void) -- cgit From c276a706ea1f51cf9723ed8484feceaf961b8f89 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Feb 2023 13:54:00 +0800 Subject: xfrm: Allow transport-mode states with AF_UNSPEC selector xfrm state selectors are matched against the inner-most flow which can be of any address family. Therefore middle states in nested configurations need to carry a wildcard selector in order to work at all. However, this is currently forbidden for transport-mode states. Fix this by removing the unnecessary check. Fixes: 13996378e658 ("[IPSEC]: Rename mode to outer_mode and add inner_mode") Reported-by: David George Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 00afe831c71c..f238048bf786 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2815,11 +2815,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, goto error; } - if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { - NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate an AF_UNSPEC selector"); - goto error; - } - x->inner_mode = *inner_mode; if (x->props.family == AF_INET) -- cgit From a54bace095d00e9222161495649688bc43de4dde Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Fri, 24 Feb 2023 17:34:50 +0200 Subject: drm/virtio: Pass correct device to dma_sync_sgtable_for_device() The "vdev->dev.parent" should be used instead of "vdev->dev" as a device for which to perform the DMA operation in both virtio_gpu_cmd_transfer_to_host_2d(3d). Because the virtio-gpu device "vdev->dev" doesn't really have DMA OPS assigned to it, but parent (virtio-pci or virtio-mmio) device "vdev->dev.parent" has. The more, the sgtable in question the code is trying to sync here was mapped for the parent device (by using its DMA OPS) previously at: virtio_gpu_object_shmem_init()->drm_gem_shmem_get_pages_sgt()-> dma_map_sgtable(), so should be synced here for the same parent device. Fixes: b5c9ed70d1a9 ("drm/virtio: Improve DMA API usage for shmem BOs") Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Dmitry Osipenko Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20230224153450.526222-1-olekstysh@gmail.com --- drivers/gpu/drm/virtio/virtgpu_vq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 9ff8660b50ad..208e9434cb28 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -597,7 +597,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev, bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev); if (virtio_gpu_is_shmem(bo) && use_dma_api) - dma_sync_sgtable_for_device(&vgdev->vdev->dev, + dma_sync_sgtable_for_device(vgdev->vdev->dev.parent, bo->base.sgt, DMA_TO_DEVICE); cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); @@ -1019,7 +1019,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev); if (virtio_gpu_is_shmem(bo) && use_dma_api) - dma_sync_sgtable_for_device(&vgdev->vdev->dev, + dma_sync_sgtable_for_device(vgdev->vdev->dev.parent, bo->base.sgt, DMA_TO_DEVICE); cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); -- cgit From ee9adb7a45516cfa536ca92253d7ae59d56db9e4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 9 Jan 2023 00:13:11 +0300 Subject: drm/shmem-helper: Remove another errant put in error path drm_gem_shmem_mmap() doesn't own reference in error code path, resulting in the dma-buf shmem GEM object getting prematurely freed leading to a later use-after-free. Fixes: f49a51bfdc8e ("drm/shme-helpers: Fix dma_buf_mmap forwarding bug") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Reviewed-by: Rob Clark Link: https://patchwork.freedesktop.org/patch/msgid/20230108211311.3950107-1-dmitry.osipenko@collabora.com --- drivers/gpu/drm/drm_gem_shmem_helper.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index b602cd72a120..1a681de9a6ea 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -622,11 +622,14 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct int ret; if (obj->import_attach) { - /* Drop the reference drm_gem_mmap_obj() acquired.*/ - drm_gem_object_put(obj); vma->vm_private_data = NULL; + ret = dma_buf_mmap(obj->dma_buf, vma, 0); + + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + if (!ret) + drm_gem_object_put(obj); - return dma_buf_mmap(obj->dma_buf, vma, 0); + return ret; } ret = drm_gem_shmem_get_pages(shmem); -- cgit From 9630b585b607bd26f505d34620b14d75b9a5af7d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 4 Nov 2022 19:04:59 +0300 Subject: drm/msm/gem: Prevent blocking within shrinker loop Consider this scenario: 1. APP1 continuously creates lots of small GEMs 2. APP2 triggers `drop_caches` 3. Shrinker starts to evict APP1 GEMs, while APP1 produces new purgeable GEMs 4. msm_gem_shrinker_scan() returns non-zero number of freed pages and causes shrinker to try shrink more 5. msm_gem_shrinker_scan() returns non-zero number of freed pages again, goto 4 6. The APP2 is blocked in `drop_caches` until APP1 stops producing purgeable GEMs To prevent this blocking scenario, check number of remaining pages that GPU shrinker couldn't release due to a GEM locking contention or shrinking rejection. If there are no remaining pages left to shrink, then there is no need to free up more pages and shrinker may break out from the loop. This problem was found during shrinker/madvise IOCTL testing of virtio-gpu driver. The MSM driver is affected in the same way. Reviewed-by: Rob Clark Reviewed-by: Thomas Zimmermann Fixes: b352ba54a820 ("drm/msm/gem: Convert to using drm_gem_lru") Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/all/20230108210445.3948344-2-dmitry.osipenko@collabora.com/ --- drivers/gpu/drm/drm_gem.c | 9 +++++++-- drivers/gpu/drm/msm/msm_gem_shrinker.c | 11 +++++++++-- include/drm/drm_gem.h | 4 +++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index b8db675e7fb5..b0246a848006 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1375,10 +1375,13 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail); * * @lru: The LRU to scan * @nr_to_scan: The number of pages to try to reclaim + * @remaining: The number of pages left to reclaim, should be initialized by caller * @shrink: Callback to try to shrink/reclaim the object. */ unsigned long -drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan, +drm_gem_lru_scan(struct drm_gem_lru *lru, + unsigned int nr_to_scan, + unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj)) { struct drm_gem_lru still_in_lru; @@ -1417,8 +1420,10 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan, * hit shrinker in response to trying to get backing pages * for this obj (ie. while it's lock is already held) */ - if (!dma_resv_trylock(obj->resv)) + if (!dma_resv_trylock(obj->resv)) { + *remaining += obj->size >> PAGE_SHIFT; goto tail; + } if (shrink(obj)) { freed += obj->size >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 051bdbc093cf..f38296ad8743 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -107,6 +107,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) bool (*shrink)(struct drm_gem_object *obj); bool cond; unsigned long freed; + unsigned long remaining; } stages[] = { /* Stages of progressively more aggressive/expensive reclaim: */ { &priv->lru.dontneed, purge, true }, @@ -116,14 +117,18 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) }; long nr = sc->nr_to_scan; unsigned long freed = 0; + unsigned long remaining = 0; for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) { if (!stages[i].cond) continue; stages[i].freed = - drm_gem_lru_scan(stages[i].lru, nr, stages[i].shrink); + drm_gem_lru_scan(stages[i].lru, nr, + &stages[i].remaining, + stages[i].shrink); nr -= stages[i].freed; freed += stages[i].freed; + remaining += stages[i].remaining; } if (freed) { @@ -132,7 +137,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) stages[3].freed); } - return (freed > 0) ? freed : SHRINK_STOP; + return (freed > 0 && remaining > 0) ? freed : SHRINK_STOP; } #ifdef CONFIG_DEBUG_FS @@ -182,10 +187,12 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) NULL, }; unsigned idx, unmapped = 0; + unsigned long remaining = 0; for (idx = 0; lrus[idx] && unmapped < vmap_shrink_limit; idx++) { unmapped += drm_gem_lru_scan(lrus[idx], vmap_shrink_limit - unmapped, + &remaining, vmap_shrink); } diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index a17c2f903f81..b46ade812443 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -475,7 +475,9 @@ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock); void drm_gem_lru_remove(struct drm_gem_object *obj); void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj); -unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan, +unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, + unsigned int nr_to_scan, + unsigned long *remaining, bool (*shrink)(struct drm_gem_object *obj)); #endif /* __DRM_GEM_H__ */ -- cgit From ba3be66f11c3c49afaa9f49b99e21d88756229ef Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 17 Nov 2022 04:40:38 +0300 Subject: drm/panfrost: Don't sync rpm suspension after mmu flushing Lockdep warns about potential circular locking dependency of devfreq with the fs_reclaim caused by immediate device suspension when mapping is released by shrinker. Fix it by doing the suspension asynchronously. Reviewed-by: Steven Price Fixes: ec7eba47da86 ("drm/panfrost: Rework page table flushing and runtime PM interaction") Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/all/20230108210445.3948344-3-dmitry.osipenko@collabora.com/ --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 4e83a1891f3e..666a5e53fe19 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -282,7 +282,7 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev, if (pm_runtime_active(pfdev->dev)) mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT); - pm_runtime_put_sync_autosuspend(pfdev->dev); + pm_runtime_put_autosuspend(pfdev->dev); } static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, -- cgit From 77bc762451c2dc72bdbea07b857c916c9e7f4952 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Feb 2023 13:33:30 +0300 Subject: fbdev: chipsfb: Fix error codes in chipsfb_pci_init() The error codes are not set on these error paths. Fixes: 145eed48de27 ("fbdev: Remove conflicting devices on PCI bus") Signed-off-by: Dan Carpenter Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/Y/yG+sm2mhdJeTZW@kili --- drivers/video/fbdev/chipsfb.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c index cc37ec3f8fc1..7799d52a651f 100644 --- a/drivers/video/fbdev/chipsfb.c +++ b/drivers/video/fbdev/chipsfb.c @@ -358,16 +358,21 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) if (rc) return rc; - if (pci_enable_device(dp) < 0) { + rc = pci_enable_device(dp); + if (rc < 0) { dev_err(&dp->dev, "Cannot enable PCI device\n"); goto err_out; } - if ((dp->resource[0].flags & IORESOURCE_MEM) == 0) + if ((dp->resource[0].flags & IORESOURCE_MEM) == 0) { + rc = -ENODEV; goto err_disable; + } addr = pci_resource_start(dp, 0); - if (addr == 0) + if (addr == 0) { + rc = -ENODEV; goto err_disable; + } p = framebuffer_alloc(0, &dp->dev); if (p == NULL) { @@ -417,7 +422,8 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) init_chips(p, addr); - if (register_framebuffer(p) < 0) { + rc = register_framebuffer(p); + if (rc < 0) { dev_err(&dp->dev,"C&T 65550 framebuffer failed to register\n"); goto err_unmap; } -- cgit From f8502fba45bd30e1a6a354d9d898bc99d1a11e6d Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Tue, 28 Feb 2023 15:11:20 +0530 Subject: tee: amdtee: fix race condition in amdtee_open_session There is a potential race condition in amdtee_open_session that may lead to use-after-free. For instance, in amdtee_open_session() after sess->sess_mask is set, and before setting: sess->session_info[i] = session_info; if amdtee_close_session() closes this same session, then 'sess' data structure will be released, causing kernel panic when 'sess' is accessed within amdtee_open_session(). The solution is to set the bit sess->sess_mask as the last step in amdtee_open_session(). Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Cc: stable@vger.kernel.org Signed-off-by: Rijo Thomas Acked-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/core.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index 297dc62bca29..372d64756ed6 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -267,35 +267,34 @@ int amdtee_open_session(struct tee_context *ctx, goto out; } + /* Open session with loaded TA */ + handle_open_session(arg, &session_info, param); + if (arg->ret != TEEC_SUCCESS) { + pr_err("open_session failed %d\n", arg->ret); + handle_unload_ta(ta_handle); + kref_put(&sess->refcount, destroy_session); + goto out; + } + /* Find an empty session index for the given TA */ spin_lock(&sess->lock); i = find_first_zero_bit(sess->sess_mask, TEE_NUM_SESSIONS); - if (i < TEE_NUM_SESSIONS) + if (i < TEE_NUM_SESSIONS) { + sess->session_info[i] = session_info; + set_session_id(ta_handle, i, &arg->session); set_bit(i, sess->sess_mask); + } spin_unlock(&sess->lock); if (i >= TEE_NUM_SESSIONS) { pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS); + handle_close_session(ta_handle, session_info); handle_unload_ta(ta_handle); kref_put(&sess->refcount, destroy_session); rc = -ENOMEM; goto out; } - /* Open session with loaded TA */ - handle_open_session(arg, &session_info, param); - if (arg->ret != TEEC_SUCCESS) { - pr_err("open_session failed %d\n", arg->ret); - spin_lock(&sess->lock); - clear_bit(i, sess->sess_mask); - spin_unlock(&sess->lock); - handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); - goto out; - } - - sess->session_info[i] = session_info; - set_session_id(ta_handle, i, &arg->session); out: free_pages((u64)ta, get_order(ta_size)); return rc; -- cgit From 4bb54c2ce48ffb3a06133ac0fb4086f7b48d9109 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 14 Feb 2023 15:05:49 +0100 Subject: arm64: tegra: Bump CBB ranges property on Tegra194 and Tegra234 Both Xavier (Tegra194) and Orin (Tegra234) support a 40-bit address map, so bump the CBB ranges property to cover all of the 1 TiB address space. This fixes an issue where some of the PCIe regions could not be remapped because of they were outside the memory specified by the CBB's ranges property. Reported-by: Jonathan Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 2 +- arch/arm64/boot/dts/nvidia/tegra234.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 133dbe5b429d..7096b999b33f 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -22,7 +22,7 @@ #address-cells = <2>; #size-cells = <2>; - ranges = <0x0 0x0 0x0 0x0 0x0 0x40000000>; + ranges = <0x0 0x0 0x0 0x0 0x100 0x0>; apbmisc: misc@100000 { compatible = "nvidia,tegra194-misc"; diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi b/arch/arm64/boot/dts/nvidia/tegra234.dtsi index 8fe8eda7654d..f1748cff8a33 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi @@ -20,7 +20,7 @@ #address-cells = <2>; #size-cells = <2>; - ranges = <0x0 0x0 0x0 0x0 0x0 0x40000000>; + ranges = <0x0 0x0 0x0 0x0 0x100 0x0>; misc@100000 { compatible = "nvidia,tegra234-misc"; -- cgit From 4d8457fe0eb9c80ff7795cf8a30962128b71d853 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 2 Mar 2023 08:47:04 +0100 Subject: drm/edid: fix info leak when failing to get panel id Make sure to clear the transfer buffer before fetching the EDID to avoid leaking slab data to the logs on errors that leave the buffer unchanged. Fixes: 69c7717c20cc ("drm/edid: Dump the EDID when drm_edid_get_panel_id() has an error") Cc: stable@vger.kernel.org # 6.2 Cc: Douglas Anderson Signed-off-by: Johan Hovold Reviewed-by: Jani Nikula Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20230302074704.11371-1-johan+linaro@kernel.org --- drivers/gpu/drm/drm_edid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 3841aba17abd..8707fe72a028 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2797,7 +2797,7 @@ u32 drm_edid_get_panel_id(struct i2c_adapter *adapter) * the EDID then we'll just return 0. */ - base_block = kmalloc(EDID_LENGTH, GFP_KERNEL); + base_block = kzalloc(EDID_LENGTH, GFP_KERNEL); if (!base_block) return 0; -- cgit From 5f8d1e3b6f9b5971f9c06d5846ce00c49e3a8d94 Mon Sep 17 00:00:00 2001 From: Tony O'Brien Date: Wed, 22 Feb 2023 13:52:27 +1300 Subject: hwmon: (adt7475) Display smoothing attributes in correct order Throughout the ADT7475 driver, attributes relating to the temperature sensors are displayed in the order Remote 1, Local, Remote 2. Make temp_st_show() conform to this expectation so that values set by temp_st_store() can be displayed using the correct attribute. Fixes: 8f05bcc33e74 ("hwmon: (adt7475) temperature smoothing") Signed-off-by: Tony O'Brien Link: https://lore.kernel.org/r/20230222005228.158661-2-tony.obrien@alliedtelesis.co.nz Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7475.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 51b3d16c3223..77222c35a38e 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -556,11 +556,11 @@ static ssize_t temp_st_show(struct device *dev, struct device_attribute *attr, val = data->enh_acoustics[0] & 0xf; break; case 1: - val = (data->enh_acoustics[1] >> 4) & 0xf; + val = data->enh_acoustics[1] & 0xf; break; case 2: default: - val = data->enh_acoustics[1] & 0xf; + val = (data->enh_acoustics[1] >> 4) & 0xf; break; } -- cgit From 48e8186870d9d0902e712d601ccb7098cb220688 Mon Sep 17 00:00:00 2001 From: Tony O'Brien Date: Wed, 22 Feb 2023 13:52:28 +1300 Subject: hwmon: (adt7475) Fix masking of hysteresis registers The wrong bits are masked in the hysteresis register; indices 0 and 2 should zero bits [7:4] and preserve bits [3:0], and index 1 should zero bits [3:0] and preserve bits [7:4]. Fixes: 1c301fc5394f ("hwmon: Add a driver for the ADT7475 hardware monitoring chip") Signed-off-by: Tony O'Brien Link: https://lore.kernel.org/r/20230222005228.158661-3-tony.obrien@alliedtelesis.co.nz Signed-off-by: Guenter Roeck --- drivers/hwmon/adt7475.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 77222c35a38e..6e4c92b500b8 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -488,10 +488,10 @@ static ssize_t temp_store(struct device *dev, struct device_attribute *attr, val = (temp - val) / 1000; if (sattr->index != 1) { - data->temp[HYSTERSIS][sattr->index] &= 0xF0; + data->temp[HYSTERSIS][sattr->index] &= 0x0F; data->temp[HYSTERSIS][sattr->index] |= (val & 0xF) << 4; } else { - data->temp[HYSTERSIS][sattr->index] &= 0x0F; + data->temp[HYSTERSIS][sattr->index] &= 0xF0; data->temp[HYSTERSIS][sattr->index] |= (val & 0xF); } -- cgit From 03d0f97fdb45c99cf6f808832db8bd5534e22374 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 3 Mar 2023 10:34:10 +0100 Subject: ASoC: clarify that SND_SOC_IMX_SGTL5000 is the old driver Both SND_SOC_IMX_SGTL5000 and SND_SOC_FSL_ASOC_CARD implement the fsl,imx-audio-sgtl5000 compatible string, which is confusing. It took a little research to find out that the latter is much newer and it is supposed to be the preferred choice since several years. Add a clarification note to avoid wasting time for future readers. Signed-off-by: Luca Ceresoli Link: https://lore.kernel.org/r/20230303093410.357621-1-luca.ceresoli@bootlin.com Signed-off-by: Mark Brown --- sound/soc/fsl/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 614eceda6b9e..33b67db8794e 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -294,6 +294,10 @@ config SND_SOC_IMX_SGTL5000 Say Y if you want to add support for SoC audio on an i.MX board with a sgtl5000 codec. + Note that this is an old driver. Consider enabling + SND_SOC_FSL_ASOC_CARD and SND_SOC_SGTL5000 to use the newer + driver. + config SND_SOC_IMX_SPDIF tristate "SoC Audio support for i.MX boards with S/PDIF" select SND_SOC_IMX_PCM_DMA -- cgit From 65882134bc622a1e57bd5928ac588855ea2e3ddd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 2 Mar 2023 13:29:08 +0100 Subject: ASoC: qcom: q6prm: fix incorrect clk_root passed to ADSP The second to last argument is clk_root (root of the clock), however the code called q6prm_request_lpass_clock() with clk_attr instead (copy-paste error). This effectively was passing value of 1 as root clock which worked on some of the SoCs (e.g. SM8450) but fails on others, depending on the ADSP. For example on SM8550 this "1" as root clock is not accepted and results in errors coming from ADSP. Fixes: 2f20640491ed ("ASoC: qdsp6: qdsp6: q6prm: handle clk disable correctly") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Srinivas Kandagatla Tested-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230302122908.221398-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6prm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6prm.c b/sound/soc/qcom/qdsp6/q6prm.c index 3aa63aac4a68..81554d202658 100644 --- a/sound/soc/qcom/qdsp6/q6prm.c +++ b/sound/soc/qcom/qdsp6/q6prm.c @@ -184,9 +184,9 @@ int q6prm_set_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_ unsigned int freq) { if (freq) - return q6prm_request_lpass_clock(dev, clk_id, clk_attr, clk_attr, freq); + return q6prm_request_lpass_clock(dev, clk_id, clk_attr, clk_root, freq); - return q6prm_release_lpass_clock(dev, clk_id, clk_attr, clk_attr, freq); + return q6prm_release_lpass_clock(dev, clk_id, clk_attr, clk_root, freq); } EXPORT_SYMBOL_GPL(q6prm_set_lpass_clock); -- cgit From e5e7e398f6bb7918dab0612eb6991f7bae95520d Mon Sep 17 00:00:00 2001 From: Ravulapati Vishnu Vardhan Rao Date: Sat, 4 Mar 2023 13:37:02 +0530 Subject: ASoC: codecs: tx-macro: Fix for KASAN: slab-out-of-bounds When we run syzkaller we get below Out of Bound. "KASAN: slab-out-of-bounds Read in regcache_flat_read" Below is the backtrace of the issue: dump_backtrace+0x0/0x4c8 show_stack+0x34/0x44 dump_stack_lvl+0xd8/0x118 print_address_description+0x30/0x2d8 kasan_report+0x158/0x198 __asan_report_load4_noabort+0x44/0x50 regcache_flat_read+0x10c/0x110 regcache_read+0xf4/0x180 _regmap_read+0xc4/0x278 _regmap_update_bits+0x130/0x290 regmap_update_bits_base+0xc0/0x15c snd_soc_component_update_bits+0xa8/0x22c snd_soc_component_write_field+0x68/0xd4 tx_macro_digital_mute+0xec/0x140 Actually There is no need to have decimator with 32 bits. By limiting the variable with short type u8 issue is resolved. Signed-off-by: Ravulapati Vishnu Vardhan Rao Link: https://lore.kernel.org/r/20230304080702.609-1-quic_visr@quicinc.com Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-tx-macro.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index bf27bdd5be20..473d3cd39554 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -242,7 +242,7 @@ enum { struct tx_mute_work { struct tx_macro *tx; - u32 decimator; + u8 decimator; struct delayed_work dwork; }; @@ -635,7 +635,7 @@ exit: return 0; } -static bool is_amic_enabled(struct snd_soc_component *component, int decimator) +static bool is_amic_enabled(struct snd_soc_component *component, u8 decimator) { u16 adc_mux_reg, adc_reg, adc_n; @@ -849,7 +849,7 @@ static int tx_macro_enable_dec(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - unsigned int decimator; + u8 decimator; u16 tx_vol_ctl_reg, dec_cfg_reg, hpf_gate_reg, tx_gain_ctl_reg; u8 hpf_cut_off_freq; int hpf_delay = TX_MACRO_DMIC_HPF_DELAY_MS; @@ -1064,7 +1064,8 @@ static int tx_macro_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_component *component = dai->component; - u32 decimator, sample_rate; + u32 sample_rate; + u8 decimator; int tx_fs_rate; struct tx_macro *tx = snd_soc_component_get_drvdata(component); @@ -1128,7 +1129,7 @@ static int tx_macro_digital_mute(struct snd_soc_dai *dai, int mute, int stream) { struct snd_soc_component *component = dai->component; struct tx_macro *tx = snd_soc_component_get_drvdata(component); - u16 decimator; + u8 decimator; /* active decimator not set yet */ if (tx->active_decimator[dai->id] == -1) -- cgit From d0dc41119905f740e8d5594adce277f7c0de8c92 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Wed, 16 Nov 2022 11:11:36 +0800 Subject: cifs: Move the in_send statistic to __smb_send_rqst() When send SMB_COM_NT_CANCEL and RFC1002_SESSION_REQUEST, the in_send statistic was lost. Let's move the in_send statistic to the send function to avoid this scenario. Fixes: 7ee1af765dfa ("[CIFS]") Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French --- fs/cifs/transport.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b42050c68e6c..24bdd5f4d3bc 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -278,7 +278,7 @@ static int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst) { - int rc = 0; + int rc; struct kvec *iov; int n_vec; unsigned int send_length = 0; @@ -289,6 +289,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, struct msghdr smb_msg = {}; __be32 rfc1002_marker; + cifs_in_send_inc(server); if (cifs_rdma_enabled(server)) { /* return -EAGAIN when connecting or reconnecting */ rc = -EAGAIN; @@ -297,14 +298,17 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, goto smbd_done; } + rc = -EAGAIN; if (ssocket == NULL) - return -EAGAIN; + goto out; + rc = -ERESTARTSYS; if (fatal_signal_pending(current)) { cifs_dbg(FYI, "signal pending before send request\n"); - return -ERESTARTSYS; + goto out; } + rc = 0; /* cork the socket */ tcp_sock_set_cork(ssocket->sk, true); @@ -407,7 +411,8 @@ smbd_done: rc); else if (rc > 0) rc = 0; - +out: + cifs_in_send_dec(server); return rc; } @@ -826,9 +831,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, * I/O response may come back and free the mid entry on another thread. */ cifs_save_when_sent(mid); - cifs_in_send_inc(server); rc = smb_send_rqst(server, 1, rqst, flags); - cifs_in_send_dec(server); if (rc < 0) { revert_current_mid(server, mid->credits); @@ -1144,9 +1147,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, else midQ[i]->callback = cifs_compound_last_callback; } - cifs_in_send_inc(server); rc = smb_send_rqst(server, num_rqst, rqst, flags); - cifs_in_send_dec(server); for (i = 0; i < num_rqst; i++) cifs_save_when_sent(midQ[i]); @@ -1396,9 +1397,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, midQ->mid_state = MID_REQUEST_SUBMITTED; - cifs_in_send_inc(server); rc = smb_send(server, in_buf, len); - cifs_in_send_dec(server); cifs_save_when_sent(midQ); if (rc < 0) @@ -1539,9 +1538,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, } midQ->mid_state = MID_REQUEST_SUBMITTED; - cifs_in_send_inc(server); rc = smb_send(server, in_buf, len); - cifs_in_send_dec(server); cifs_save_when_sent(midQ); if (rc < 0) -- cgit From fd4334a06d452ce89a0bb831b03130c51331d927 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 31 Jan 2023 11:35:58 +0100 Subject: arm64: dts: freescale: imx8-ss-lsio: Fix flexspi clock order The correct clock order is "fspi_en" and "fspi". As they are identical just reordering the names is sufficient. Fixes: 6276d66984e9 ("arm64: dts: imx8dxl: add flexspi0 support") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 1f3d225e64ec..06b94bbc2b97 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -117,7 +117,7 @@ lsio_subsys: bus@5d000000 { interrupts = ; clocks = <&clk IMX_SC_R_FSPI_0 IMX_SC_PM_CLK_PER>, <&clk IMX_SC_R_FSPI_0 IMX_SC_PM_CLK_PER>; - clock-names = "fspi", "fspi_en"; + clock-names = "fspi_en", "fspi"; power-domains = <&pd IMX_SC_R_FSPI_0>; status = "disabled"; }; -- cgit From 916508c30e22f658f12dc736f8198d8a096cb24d Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 7 Feb 2023 14:10:20 +0100 Subject: Revert "arm64: dts: ls1028a: sl28: get MAC addresses from VPD" With commit b203e6f1e833 ("arm64: dts: ls1028a: sl28: get MAC addresses from VPD"), the network adapter now depends on the nvmem device to be present, which isn't the case and thus breaks networking on this board. Revert it. Fixes: b203e6f1e833 ("arm64: dts: ls1028a: sl28: get MAC addresses from VPD") Signed-off-by: Michael Walle Signed-off-by: Shawn Guo --- .../dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts | 12 ------------ .../dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts | 2 -- .../dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts | 8 -------- .../dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts | 2 -- .../boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts | 17 ----------------- 5 files changed, 41 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts index af9194eca556..73eb6061c73e 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-kbox-a-230-ls.dts @@ -56,14 +56,10 @@ }; &enetc_port2 { - nvmem-cells = <&base_mac_address 2>; - nvmem-cell-names = "mac-address"; status = "okay"; }; &enetc_port3 { - nvmem-cells = <&base_mac_address 3>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -84,8 +80,6 @@ managed = "in-band-status"; phy-handle = <&qsgmii_phy0>; phy-mode = "qsgmii"; - nvmem-cells = <&base_mac_address 4>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -94,8 +88,6 @@ managed = "in-band-status"; phy-handle = <&qsgmii_phy1>; phy-mode = "qsgmii"; - nvmem-cells = <&base_mac_address 5>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -104,8 +96,6 @@ managed = "in-band-status"; phy-handle = <&qsgmii_phy2>; phy-mode = "qsgmii"; - nvmem-cells = <&base_mac_address 6>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -114,8 +104,6 @@ managed = "in-band-status"; phy-handle = <&qsgmii_phy3>; phy-mode = "qsgmii"; - nvmem-cells = <&base_mac_address 7>; - nvmem-cell-names = "mac-address"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts index 1f34c7553459..7cd29ab970d9 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts @@ -55,7 +55,5 @@ &enetc_port1 { phy-handle = <&phy0>; phy-mode = "rgmii-id"; - nvmem-cells = <&base_mac_address 0>; - nvmem-cell-names = "mac-address"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts index aac41192caa1..113b1df74bf8 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts @@ -36,14 +36,10 @@ }; &enetc_port2 { - nvmem-cells = <&base_mac_address 2>; - nvmem-cell-names = "mac-address"; status = "okay"; }; &enetc_port3 { - nvmem-cells = <&base_mac_address 3>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -56,8 +52,6 @@ managed = "in-band-status"; phy-handle = <&phy0>; phy-mode = "sgmii"; - nvmem-cells = <&base_mac_address 0>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -66,8 +60,6 @@ managed = "in-band-status"; phy-handle = <&phy1>; phy-mode = "sgmii"; - nvmem-cells = <&base_mac_address 1>; - nvmem-cell-names = "mac-address"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts index a4421db3784e..9b5e92fb753e 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts @@ -43,7 +43,5 @@ &enetc_port1 { phy-handle = <&phy1>; phy-mode = "rgmii-id"; - nvmem-cells = <&base_mac_address 1>; - nvmem-cell-names = "mac-address"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts index 8b65af4a7147..4ab17b984b03 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts @@ -92,8 +92,6 @@ phy-handle = <&phy0>; phy-mode = "sgmii"; managed = "in-band-status"; - nvmem-cells = <&base_mac_address 0>; - nvmem-cell-names = "mac-address"; status = "okay"; }; @@ -156,21 +154,6 @@ label = "bootloader environment"; }; }; - - otp-1 { - compatible = "user-otp"; - - nvmem-layout { - compatible = "kontron,sl28-vpd"; - - serial_number: serial-number { - }; - - base_mac_address: base-mac-address { - #nvmem-cell-cells = <1>; - }; - }; - }; }; }; -- cgit From 5c8cf1664f288098a971a1d1e65716a2b6a279e1 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Fri, 3 Mar 2023 12:33:12 +0000 Subject: drm/meson: fix 1px pink line on GXM when scaling video overlay Playing media with a resolution smaller than the crtc size requires the video overlay to be scaled for output and GXM boards display a 1px pink line on the bottom of the scaled overlay. Comparing with the downstream vendor driver revealed VPP_DUMMY_DATA not being set [0]. Setting VPP_DUMMY_DATA prevents the 1px pink line from being seen. [0] https://github.com/endlessm/linux-s905x/blob/master/drivers/amlogic/amports/video.c#L7869 Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Suggested-by: Martin Blumenstingl Signed-off-by: Christian Hewitt Acked-by: Martin Blumenstingl Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230303123312.155164-1-christianshewitt@gmail.com --- drivers/gpu/drm/meson/meson_vpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/meson/meson_vpp.c b/drivers/gpu/drm/meson/meson_vpp.c index 154837688ab0..5df1957c8e41 100644 --- a/drivers/gpu/drm/meson/meson_vpp.c +++ b/drivers/gpu/drm/meson/meson_vpp.c @@ -100,6 +100,8 @@ void meson_vpp_init(struct meson_drm *priv) priv->io_base + _REG(VPP_DOLBY_CTRL)); writel_relaxed(0x1020080, priv->io_base + _REG(VPP_DUMMY_DATA1)); + writel_relaxed(0x42020, + priv->io_base + _REG(VPP_DUMMY_DATA)); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) writel_relaxed(0xf, priv->io_base + _REG(DOLBY_PATH_CTRL)); -- cgit From 8ab5059dc4f4c34325eba6270ef12a4ab1386019 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 23 Jan 2023 18:07:07 +0300 Subject: firmware: arm_scmi: Clean up a return statement in scmi_probe The comments say "enabled" where "disabled" is intended. Also it's cleaner to return zero explicitly instead of ret. Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/Y86im5M49p3ePGxj@kili Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index d21c7eafd641..703f16ef3953 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2739,8 +2739,8 @@ static int scmi_probe(struct platform_device *pdev) if (ret) goto clear_dev_req_notifier; - /* Bail out anyway when coex enabled */ - return ret; + /* Bail out anyway when coex disabled. */ + return 0; } /* Coex enabled, carry on in any case. */ -- cgit From 6bed395d7db2c039c0ef4123a379e27c528a3357 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 22 Feb 2023 18:17:06 +0300 Subject: firmware: arm_scmi: Return a literal instead of a variable In this context "return scmi_dev;" and "return NULL;" are equivalent but it is more readable to return a literal. Signed-off-by: Dan Carpenter Reviewed-by: Cristian Marussi Link: https://lore.kernel.org/r/Y/Yx8pOdf8rNhPVe@kili Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 73140b854b31..ac306ca48b07 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -436,7 +436,7 @@ struct scmi_device *scmi_device_create(struct device_node *np, /* Nothing to do. */ if (!phead) { mutex_unlock(&scmi_requested_devices_mtx); - return scmi_dev; + return NULL; } /* Walk the list of requested devices for protocol and create them */ -- cgit From 418a406d92cc276ddf81d4223271af1ae09fa5af Mon Sep 17 00:00:00 2001 From: Ye Xingchen Date: Fri, 10 Feb 2023 15:20:07 +0800 Subject: firmware: arm_scmi: Remove duplicate include header inclusion linux/of.h is included more than once, just remove the duplicate include header inclusion. Signed-off-by: Ye Xingchen Link: https://lore.kernel.org/r/202302101520071730986@zte.com.cn Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index ac306ca48b07..c15928b8c5cc 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -14,7 +14,6 @@ #include #include #include -#include #include "common.h" -- cgit From b2b76e977fc6bc38e6a4dedb62b34bc90cc6ce97 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 23 Feb 2023 15:23:30 +0000 Subject: firmware: arm_scmi: Fix raw coexistence mode behaviour on failure path When SCMI raw coexistence mode is enabled make the core stack probe successfully even when the initial base protocol exchanges with the platform/server failed. This behaviour enables the system to boot with a broken regular SCMI stack but with a fully functional and accessible SCMI raw debugfs interface that can be used to further debug the issue. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20230223152330.2707260-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 703f16ef3953..15a431639d82 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2657,6 +2657,7 @@ static int scmi_probe(struct platform_device *pdev) struct scmi_handle *handle; const struct scmi_desc *desc; struct scmi_info *info; + bool coex = IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT_COEX); struct device *dev = &pdev->dev; struct device_node *child, *np = dev->of_node; @@ -2731,9 +2732,6 @@ static int scmi_probe(struct platform_device *pdev) dev_warn(dev, "Failed to setup SCMI debugfs.\n"); if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { - bool coex = - IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT_COEX); - ret = scmi_debugfs_raw_mode_setup(info); if (!coex) { if (ret) @@ -2764,6 +2762,8 @@ static int scmi_probe(struct platform_device *pdev) ret = scmi_protocol_acquire(handle, SCMI_PROTOCOL_BASE); if (ret) { dev_err(dev, "unable to communicate with SCMI\n"); + if (coex) + return 0; goto notification_exit; } -- cgit From 7af9da8ce8f9a16221ecd8ba4280582f5bd452fc Mon Sep 17 00:00:00 2001 From: Sanjay R Mehta Date: Tue, 14 Feb 2023 13:13:50 -0600 Subject: thunderbolt: Add quirk to disable CLx Add QUIRK_NO_CLX to disable the CLx state for hardware which doesn't supports it. AMD Yellow Carp and Pink Sardine don't support CLx state, hence disabling it using QUIRK_NO_CLX. Cc: stable@vger.kernel.org Signed-off-by: Sanjay R Mehta Signed-off-by: Basavaraj Natikar [mw: added debug log when the quirk is run] Signed-off-by: Mika Westerberg --- drivers/thunderbolt/quirks.c | 13 +++++++++++++ drivers/thunderbolt/tb.h | 11 ++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index b5f2ec79c4d6..ae28a03fa890 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -20,6 +20,12 @@ static void quirk_dp_credit_allocation(struct tb_switch *sw) } } +static void quirk_clx_disable(struct tb_switch *sw) +{ + sw->quirks |= QUIRK_NO_CLX; + tb_sw_dbg(sw, "disabling CL states\n"); +} + struct tb_quirk { u16 hw_vendor_id; u16 hw_device_id; @@ -37,6 +43,13 @@ static const struct tb_quirk tb_quirks[] = { * DP buffers. */ { 0x8087, 0x0b26, 0x0000, 0x0000, quirk_dp_credit_allocation }, + /* + * CLx is not supported on AMD USB4 Yellow Carp and Pink Sardine platforms. + */ + { 0x0438, 0x0208, 0x0000, 0x0000, quirk_clx_disable }, + { 0x0438, 0x0209, 0x0000, 0x0000, quirk_clx_disable }, + { 0x0438, 0x020a, 0x0000, 0x0000, quirk_clx_disable }, + { 0x0438, 0x020b, 0x0000, 0x0000, quirk_clx_disable }, }; /** diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index cbb20a277346..64968c162ec7 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -23,6 +23,11 @@ #define NVM_MAX_SIZE SZ_512K #define NVM_DATA_DWORDS 16 +/* Keep link controller awake during update */ +#define QUIRK_FORCE_POWER_LINK_CONTROLLER BIT(0) +/* Disable CLx if not supported */ +#define QUIRK_NO_CLX BIT(1) + /** * struct tb_nvm - Structure holding NVM information * @dev: Owner of the NVM @@ -1019,6 +1024,9 @@ static inline bool tb_switch_is_clx_enabled(const struct tb_switch *sw, */ static inline bool tb_switch_is_clx_supported(const struct tb_switch *sw) { + if (sw->quirks & QUIRK_NO_CLX) + return false; + return tb_switch_is_usb4(sw) || tb_switch_is_titan_ridge(sw); } @@ -1291,9 +1299,6 @@ struct usb4_port *usb4_port_device_add(struct tb_port *port); void usb4_port_device_remove(struct usb4_port *usb4); int usb4_port_device_resume(struct usb4_port *usb4); -/* Keep link controller awake during update */ -#define QUIRK_FORCE_POWER_LINK_CONTROLLER BIT(0) - void tb_check_quirks(struct tb_switch *sw); #ifdef CONFIG_ACPI -- cgit From f77ebdda0ee652124061c2ac42399bb6c367e729 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 5 Jan 2023 02:22:19 +0200 Subject: interconnect: qcom: osm-l3: fix icc_onecell_data allocation This is a struct with a trailing zero-length array of icc_node pointers but it's allocated as if it were a single array of icc_nodes instead. Fortunately this overallocates memory rather then allocating less memory than required. Fix by replacing devm_kcalloc() with devm_kzalloc() and struct_size() macro. Fixes: 5bc9900addaf ("interconnect: qcom: Add OSM L3 interconnect provider support") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230105002221.1416479-2-dmitry.baryshkov@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/osm-l3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c index 5fa171087425..1bc01ff6e02a 100644 --- a/drivers/interconnect/qcom/osm-l3.c +++ b/drivers/interconnect/qcom/osm-l3.c @@ -236,7 +236,7 @@ static int qcom_osm_l3_probe(struct platform_device *pdev) qnodes = desc->nodes; num_nodes = desc->num_nodes; - data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL); + data = devm_kzalloc(&pdev->dev, struct_size(data, nodes, num_nodes), GFP_KERNEL); if (!data) return -ENOMEM; -- cgit From 87e8fab1917a2b3f6e3dedfd1cdf22a1416e6676 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 5 Jan 2023 02:22:20 +0200 Subject: interconnect: qcom: sm8450: switch to qcom_icc_rpmh_* function Change sm8450 interconnect driver to use generic qcom_icc_rpmh_* functions rather than embedding a copy of thema. This also fixes an overallocation of memory for icc_onecell_data structure. Fixes: fafc114a468e ("interconnect: qcom: Add SM8450 interconnect provider driver") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230105002221.1416479-3-dmitry.baryshkov@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sm8450.c | 98 +------------------------------------- 1 file changed, 2 insertions(+), 96 deletions(-) diff --git a/drivers/interconnect/qcom/sm8450.c b/drivers/interconnect/qcom/sm8450.c index e3a12e3d6e06..2d7a8e7b85ec 100644 --- a/drivers/interconnect/qcom/sm8450.c +++ b/drivers/interconnect/qcom/sm8450.c @@ -1844,100 +1844,6 @@ static const struct qcom_icc_desc sm8450_system_noc = { .num_bcms = ARRAY_SIZE(system_noc_bcms), }; -static int qnoc_probe(struct platform_device *pdev) -{ - const struct qcom_icc_desc *desc; - struct icc_onecell_data *data; - struct icc_provider *provider; - struct qcom_icc_node * const *qnodes; - struct qcom_icc_provider *qp; - struct icc_node *node; - size_t num_nodes, i; - int ret; - - desc = device_get_match_data(&pdev->dev); - if (!desc) - return -EINVAL; - - qnodes = desc->nodes; - num_nodes = desc->num_nodes; - - qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL); - if (!qp) - return -ENOMEM; - - data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL); - if (!data) - return -ENOMEM; - - provider = &qp->provider; - provider->dev = &pdev->dev; - provider->set = qcom_icc_set; - provider->pre_aggregate = qcom_icc_pre_aggregate; - provider->aggregate = qcom_icc_aggregate; - provider->xlate_extended = qcom_icc_xlate_extended; - INIT_LIST_HEAD(&provider->nodes); - provider->data = data; - - qp->dev = &pdev->dev; - qp->bcms = desc->bcms; - qp->num_bcms = desc->num_bcms; - - qp->voter = of_bcm_voter_get(qp->dev, NULL); - if (IS_ERR(qp->voter)) - return PTR_ERR(qp->voter); - - ret = icc_provider_add(provider); - if (ret) { - dev_err(&pdev->dev, "error adding interconnect provider\n"); - return ret; - } - - for (i = 0; i < qp->num_bcms; i++) - qcom_icc_bcm_init(qp->bcms[i], &pdev->dev); - - for (i = 0; i < num_nodes; i++) { - size_t j; - - if (!qnodes[i]) - continue; - - node = icc_node_create(qnodes[i]->id); - if (IS_ERR(node)) { - ret = PTR_ERR(node); - goto err; - } - - node->name = qnodes[i]->name; - node->data = qnodes[i]; - icc_node_add(node, provider); - - for (j = 0; j < qnodes[i]->num_links; j++) - icc_link_create(node, qnodes[i]->links[j]); - - data->nodes[i] = node; - } - data->num_nodes = num_nodes; - - platform_set_drvdata(pdev, qp); - - return 0; -err: - icc_nodes_remove(provider); - icc_provider_del(provider); - return ret; -} - -static int qnoc_remove(struct platform_device *pdev) -{ - struct qcom_icc_provider *qp = platform_get_drvdata(pdev); - - icc_nodes_remove(&qp->provider); - icc_provider_del(&qp->provider); - - return 0; -} - static const struct of_device_id qnoc_of_match[] = { { .compatible = "qcom,sm8450-aggre1-noc", .data = &sm8450_aggre1_noc}, @@ -1966,8 +1872,8 @@ static const struct of_device_id qnoc_of_match[] = { MODULE_DEVICE_TABLE(of, qnoc_of_match); static struct platform_driver qnoc_driver = { - .probe = qnoc_probe, - .remove = qnoc_remove, + .probe = qcom_icc_rpmh_probe, + .remove = qcom_icc_rpmh_remove, .driver = { .name = "qnoc-sm8450", .of_match_table = qnoc_of_match, -- cgit From 0d00cd114f20e4a6db37e4b08435c27acc1d1db0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 5 Jan 2023 02:22:21 +0200 Subject: interconnect: qcom: sm8550: switch to qcom_icc_rpmh_* function Change sm8550 interconnect driver to use generic qcom_icc_rpmh_* functions rather than embedding a copy of thema. This also fixes an overallocation of memory for icc_onecell_data structure. Fixes: e6f0d6a30f73 ("interconnect: qcom: Add SM8550 interconnect provider driver") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230105002221.1416479-4-dmitry.baryshkov@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sm8550.c | 99 +------------------------------------- 1 file changed, 2 insertions(+), 97 deletions(-) diff --git a/drivers/interconnect/qcom/sm8550.c b/drivers/interconnect/qcom/sm8550.c index 54fa027ab961..d823ba988ef6 100644 --- a/drivers/interconnect/qcom/sm8550.c +++ b/drivers/interconnect/qcom/sm8550.c @@ -2165,101 +2165,6 @@ static const struct qcom_icc_desc sm8550_system_noc = { .num_bcms = ARRAY_SIZE(system_noc_bcms), }; -static int qnoc_probe(struct platform_device *pdev) -{ - const struct qcom_icc_desc *desc; - struct icc_onecell_data *data; - struct icc_provider *provider; - struct qcom_icc_node * const *qnodes; - struct qcom_icc_provider *qp; - struct icc_node *node; - size_t num_nodes, i; - int ret; - - desc = device_get_match_data(&pdev->dev); - if (!desc) - return -EINVAL; - - qnodes = desc->nodes; - num_nodes = desc->num_nodes; - - qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL); - if (!qp) - return -ENOMEM; - - data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL); - if (!data) - return -ENOMEM; - - provider = &qp->provider; - provider->dev = &pdev->dev; - provider->set = qcom_icc_set; - provider->pre_aggregate = qcom_icc_pre_aggregate; - provider->aggregate = qcom_icc_aggregate; - provider->xlate_extended = qcom_icc_xlate_extended; - INIT_LIST_HEAD(&provider->nodes); - provider->data = data; - - qp->dev = &pdev->dev; - qp->bcms = desc->bcms; - qp->num_bcms = desc->num_bcms; - - qp->voter = of_bcm_voter_get(qp->dev, NULL); - if (IS_ERR(qp->voter)) - return PTR_ERR(qp->voter); - - ret = icc_provider_add(provider); - if (ret) { - dev_err_probe(&pdev->dev, ret, - "error adding interconnect provider\n"); - return ret; - } - - for (i = 0; i < qp->num_bcms; i++) - qcom_icc_bcm_init(qp->bcms[i], &pdev->dev); - - for (i = 0; i < num_nodes; i++) { - size_t j; - - if (!qnodes[i]) - continue; - - node = icc_node_create(qnodes[i]->id); - if (IS_ERR(node)) { - ret = PTR_ERR(node); - goto err; - } - - node->name = qnodes[i]->name; - node->data = qnodes[i]; - icc_node_add(node, provider); - - for (j = 0; j < qnodes[i]->num_links; j++) - icc_link_create(node, qnodes[i]->links[j]); - - data->nodes[i] = node; - } - data->num_nodes = num_nodes; - - platform_set_drvdata(pdev, qp); - - return 0; -err: - icc_nodes_remove(provider); - icc_provider_del(provider); - return ret; -} - -static int qnoc_remove(struct platform_device *pdev) -{ - struct qcom_icc_provider *qp = platform_get_drvdata(pdev); - - icc_nodes_remove(&qp->provider); - icc_provider_del(&qp->provider); - - return 0; -} - static const struct of_device_id qnoc_of_match[] = { { .compatible = "qcom,sm8550-aggre1-noc", .data = &sm8550_aggre1_noc}, @@ -2294,8 +2199,8 @@ static const struct of_device_id qnoc_of_match[] = { MODULE_DEVICE_TABLE(of, qnoc_of_match); static struct platform_driver qnoc_driver = { - .probe = qnoc_probe, - .remove = qnoc_remove, + .probe = qcom_icc_rpmh_probe, + .remove = qcom_icc_rpmh_remove, .driver = { .name = "qnoc-sm8550", .of_match_table = qnoc_of_match, -- cgit From ceac10c83b330680cc01ceaaab86cd49f4f30d81 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Wed, 22 Feb 2023 00:10:14 +0100 Subject: ARM: 9290/1: uaccess: Fix KASAN false-positives __copy_to_user_memcpy() and __clear_user_memset() had been calling memcpy() and memset() respectively, leading to false-positive KASAN reports when starting userspace: [ 10.707901] Run /init as init process [ 10.731892] process '/bin/busybox' started with executable stack [ 10.745234] ================================================================== [ 10.745796] BUG: KASAN: user-memory-access in __clear_user_memset+0x258/0x3ac [ 10.747260] Write of size 2687 at addr 000de581 by task init/1 Use __memcpy() and __memset() instead to allow userspace access, which is of course the intent of these functions. Signed-off-by: Andrew Jeffery Signed-off-by: Zev Weiss Reviewed-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/lib/uaccess_with_memcpy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 14eecaaf295f..e4c2677cc1e9 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -116,7 +116,7 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) tocopy = n; ua_flags = uaccess_save_and_enable(); - memcpy((void *)to, from, tocopy); + __memcpy((void *)to, from, tocopy); uaccess_restore(ua_flags); to += tocopy; from += tocopy; @@ -178,7 +178,7 @@ __clear_user_memset(void __user *addr, unsigned long n) tocopy = n; ua_flags = uaccess_save_and_enable(); - memset((void *)addr, 0, tocopy); + __memset((void *)addr, 0, tocopy); uaccess_restore(ua_flags); addr += tocopy; n -= tocopy; -- cgit From d16c893425d07ada1fdd817ec06d322efcf69480 Mon Sep 17 00:00:00 2001 From: Amadeusz Sławiński Date: Fri, 3 Mar 2023 14:48:50 +0100 Subject: ASoC: Intel: avs: max98357a: Explicitly define codec format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit max98357a is speaker codec configured in 48000/2/S16_LE format regardless of front end format, so force it to be so. Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230303134854.2277146-2-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/boards/max98357a.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sound/soc/intel/avs/boards/max98357a.c b/sound/soc/intel/avs/boards/max98357a.c index 921f42caf7e0..183123d08c5a 100644 --- a/sound/soc/intel/avs/boards/max98357a.c +++ b/sound/soc/intel/avs/boards/max98357a.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,26 @@ static const struct snd_soc_dapm_route card_base_routes[] = { { "Spk", NULL, "Speaker" }, }; +static int +avs_max98357a_be_fixup(struct snd_soc_pcm_runtime *runrime, struct snd_pcm_hw_params *params) +{ + struct snd_interval *rate, *channels; + struct snd_mask *fmt; + + rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); + channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); + fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + + /* The ADSP will convert the FE rate to 48k, stereo */ + rate->min = rate->max = 48000; + channels->min = channels->max = 2; + + /* set SSP0 to 16 bit */ + snd_mask_none(fmt); + snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S16_LE); + return 0; +} + static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port, struct snd_soc_dai_link **dai_link) { @@ -55,6 +76,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in dl->num_platforms = 1; dl->id = 0; dl->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS; + dl->be_hw_params_fixup = avs_max98357a_be_fixup; dl->nonatomic = 1; dl->no_pcm = 1; dl->dpcm_playback = 1; -- cgit From 61f368624fe4d0c25c6e9c917574b8ace51d776e Mon Sep 17 00:00:00 2001 From: Amadeusz Sławiński Date: Fri, 3 Mar 2023 14:48:51 +0100 Subject: ASoC: Intel: avs: da7219: Explicitly define codec format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit da7219 is headset codec configured in 48000/2/S24_LE format regardless of front end format, so force it to be so. Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230303134854.2277146-3-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/boards/da7219.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sound/soc/intel/avs/boards/da7219.c b/sound/soc/intel/avs/boards/da7219.c index acd43b6108e9..1a1d572cc1d0 100644 --- a/sound/soc/intel/avs/boards/da7219.c +++ b/sound/soc/intel/avs/boards/da7219.c @@ -117,6 +117,26 @@ static void avs_da7219_codec_exit(struct snd_soc_pcm_runtime *rtd) snd_soc_component_set_jack(asoc_rtd_to_codec(rtd, 0)->component, NULL, NULL); } +static int +avs_da7219_be_fixup(struct snd_soc_pcm_runtime *runrime, struct snd_pcm_hw_params *params) +{ + struct snd_interval *rate, *channels; + struct snd_mask *fmt; + + rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); + channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); + fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + + /* The ADSP will convert the FE rate to 48k, stereo */ + rate->min = rate->max = 48000; + channels->min = channels->max = 2; + + /* set SSP0 to 24 bit */ + snd_mask_none(fmt); + snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S24_LE); + return 0; +} + static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port, struct snd_soc_dai_link **dai_link) { @@ -148,6 +168,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in dl->num_platforms = 1; dl->id = 0; dl->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS; + dl->be_hw_params_fixup = avs_da7219_be_fixup; dl->init = avs_da7219_codec_init; dl->exit = avs_da7219_codec_exit; dl->nonatomic = 1; -- cgit From d24dbc865c2bd5946bef62bb862a65df092dfc79 Mon Sep 17 00:00:00 2001 From: Amadeusz Sławiński Date: Fri, 3 Mar 2023 14:48:52 +0100 Subject: ASoC: Intel: avs: rt5682: Explicitly define codec format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rt5682 is headset codec configured in 48000/2/S24_LE format regardless of front end format, so force it to be so. Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230303134854.2277146-4-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/boards/rt5682.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sound/soc/intel/avs/boards/rt5682.c b/sound/soc/intel/avs/boards/rt5682.c index 473e9fe5d0bf..b2c2ba93dcb5 100644 --- a/sound/soc/intel/avs/boards/rt5682.c +++ b/sound/soc/intel/avs/boards/rt5682.c @@ -169,6 +169,27 @@ static const struct snd_soc_ops avs_rt5682_ops = { .hw_params = avs_rt5682_hw_params, }; +static int +avs_rt5682_be_fixup(struct snd_soc_pcm_runtime *runtime, struct snd_pcm_hw_params *params) +{ + struct snd_interval *rate, *channels; + struct snd_mask *fmt; + + rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); + channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); + fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + + /* The ADSP will convert the FE rate to 48k, stereo */ + rate->min = rate->max = 48000; + channels->min = channels->max = 2; + + /* set SSPN to 24 bit */ + snd_mask_none(fmt); + snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S24_LE); + + return 0; +} + static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port, struct snd_soc_dai_link **dai_link) { @@ -201,6 +222,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in dl->id = 0; dl->init = avs_rt5682_codec_init; dl->exit = avs_rt5682_codec_exit; + dl->be_hw_params_fixup = avs_rt5682_be_fixup; dl->ops = &avs_rt5682_ops; dl->nonatomic = 1; dl->no_pcm = 1; -- cgit From 933de2d127281731166cf2880fa1e23c5a0f7faa Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 3 Mar 2023 14:48:53 +0100 Subject: ASoC: Intel: avs: ssm4567: Remove nau8825 bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of the nau8825 clock control got into the ssm4567, remove it. Signed-off-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230303134854.2277146-5-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/boards/ssm4567.c | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/sound/soc/intel/avs/boards/ssm4567.c b/sound/soc/intel/avs/boards/ssm4567.c index c5db69612762..2b7f5ad92aca 100644 --- a/sound/soc/intel/avs/boards/ssm4567.c +++ b/sound/soc/intel/avs/boards/ssm4567.c @@ -15,7 +15,6 @@ #include #include "../../../codecs/nau8825.h" -#define SKL_NUVOTON_CODEC_DAI "nau8825-hifi" #define SKL_SSM_CODEC_DAI "ssm4567-hifi" static struct snd_soc_codec_conf card_codec_conf[] = { @@ -34,41 +33,11 @@ static const struct snd_kcontrol_new card_controls[] = { SOC_DAPM_PIN_SWITCH("Right Speaker"), }; -static int -platform_clock_control(struct snd_soc_dapm_widget *w, struct snd_kcontrol *control, int event) -{ - struct snd_soc_dapm_context *dapm = w->dapm; - struct snd_soc_card *card = dapm->card; - struct snd_soc_dai *codec_dai; - int ret; - - codec_dai = snd_soc_card_get_codec_dai(card, SKL_NUVOTON_CODEC_DAI); - if (!codec_dai) { - dev_err(card->dev, "Codec dai not found\n"); - return -EINVAL; - } - - if (SND_SOC_DAPM_EVENT_ON(event)) { - ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_MCLK, 24000000, - SND_SOC_CLOCK_IN); - if (ret < 0) - dev_err(card->dev, "set sysclk err = %d\n", ret); - } else { - ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_INTERNAL, 0, SND_SOC_CLOCK_IN); - if (ret < 0) - dev_err(card->dev, "set sysclk err = %d\n", ret); - } - - return ret; -} - static const struct snd_soc_dapm_widget card_widgets[] = { SND_SOC_DAPM_SPK("Left Speaker", NULL), SND_SOC_DAPM_SPK("Right Speaker", NULL), SND_SOC_DAPM_SPK("DP1", NULL), SND_SOC_DAPM_SPK("DP2", NULL), - SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, platform_clock_control, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), }; static const struct snd_soc_dapm_route card_base_routes[] = { -- cgit From 6206b2e787da2ed567922c37bb588a44f6fb6705 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 3 Mar 2023 14:48:54 +0100 Subject: ASoC: Intel: avs: nau8825: Adjust clock control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Internal clock shall be adjusted also in cases when DAPM event other than 'ON' is triggered. Signed-off-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20230303134854.2277146-6-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/boards/nau8825.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/soc/intel/avs/boards/nau8825.c b/sound/soc/intel/avs/boards/nau8825.c index b31fa931ba8b..b69fc5567135 100644 --- a/sound/soc/intel/avs/boards/nau8825.c +++ b/sound/soc/intel/avs/boards/nau8825.c @@ -33,15 +33,15 @@ avs_nau8825_clock_control(struct snd_soc_dapm_widget *w, struct snd_kcontrol *co return -EINVAL; } - if (!SND_SOC_DAPM_EVENT_ON(event)) { + if (SND_SOC_DAPM_EVENT_ON(event)) + ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_MCLK, 24000000, + SND_SOC_CLOCK_IN); + else ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_INTERNAL, 0, SND_SOC_CLOCK_IN); - if (ret < 0) { - dev_err(card->dev, "set sysclk err = %d\n", ret); - return ret; - } - } + if (ret < 0) + dev_err(card->dev, "Set sysclk failed: %d\n", ret); - return 0; + return ret; } static const struct snd_kcontrol_new card_controls[] = { -- cgit From c28cd1f3433c7e339315d1ddacaeacf0fdfbe252 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Mar 2023 17:46:38 -0800 Subject: clk: Mark a fwnode as initialized when using CLK_OF_DECLARE() macro We already mark fwnodes as initialized when they are registered as clock providers. We do this so that fw_devlink can tell when a clock driver doesn't use the driver core framework to probe/initialize its device. This ensures fw_devlink doesn't block the consumers of such a clock provider indefinitely. However, some users of CLK_OF_DECLARE() macros don't use the same node that matches the macro as the node for the clock provider, but they initialize the entire node. To cover these cases, also mark the nodes that match the macros as initialized when the init callback function is called. An example of this is "stericsson,u8500-clks" that's handled using CLK_OF_DECLARE() and looks something like this: clocks { compatible = "stericsson,u8500-clks"; prcmu_clk: prcmu-clock { #clock-cells = <1>; }; prcc_pclk: prcc-periph-clock { #clock-cells = <2>; }; prcc_kclk: prcc-kernel-clock { #clock-cells = <2>; }; prcc_reset: prcc-reset-controller { #reset-cells = <2>; }; ... }; This patch makes sure that "clocks" is marked as initialized so that fw_devlink knows that all nodes under it have been initialized. If the driver creates struct devices for some of the subnodes, fw_devlink is smart enough to know to wait for those devices to probe, so no special handling is required for those cases. Cc: Greg Kroah-Hartman Reported-by: Linus Walleij Link: https://lore.kernel.org/lkml/CACRpkdamxDX6EBVjKX5=D3rkHp17f5pwGdBVhzFU90-0MHY6dQ@mail.gmail.com/ Fixes: 4a032827daa8 ("of: property: Simplify of_link_to_phandle()") Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20230302014639.297514-1-saravanak@google.com Reviewed-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 842e72a5348f..c9f5276006a0 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1363,7 +1363,13 @@ struct clk_hw_onecell_data { struct clk_hw *hws[]; }; -#define CLK_OF_DECLARE(name, compat, fn) OF_DECLARE_1(clk, name, compat, fn) +#define CLK_OF_DECLARE(name, compat, fn) \ + static void __init name##_of_clk_init_declare(struct device_node *np) \ + { \ + fn(np); \ + fwnode_dev_initialized(of_fwnode_handle(np), true); \ + } \ + OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) /* * Use this macro when you have a driver that requires two initialization -- cgit From 9d941b8abf863751c1c634d96539c3fab220a99c Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 22 Feb 2023 12:14:37 +0000 Subject: kbuild, clk: bcm2835: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: Michael Turquette Cc: Stephen Boyd Cc: Florian Fainelli Cc: Ray Jui Cc: Scott Branden Cc: linux-clk@vger.kernel.org Cc: linux-rpi-kernel@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230222121453.91915-12-nick.alcock@oracle.com Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-bcm2835-aux.c | 1 - drivers/clk/bcm/clk-bcm2835.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/clk/bcm/clk-bcm2835-aux.c b/drivers/clk/bcm/clk-bcm2835-aux.c index 290a2846a86b..0fafa5cba442 100644 --- a/drivers/clk/bcm/clk-bcm2835-aux.c +++ b/drivers/clk/bcm/clk-bcm2835-aux.c @@ -69,4 +69,3 @@ builtin_platform_driver(bcm2835_aux_clk_driver); MODULE_AUTHOR("Eric Anholt "); MODULE_DESCRIPTION("BCM2835 auxiliary peripheral clock driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index e74fe6219d14..8dc476ef5bf9 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -2350,4 +2350,3 @@ builtin_platform_driver(bcm2835_clk_driver); MODULE_AUTHOR("Eric Anholt "); MODULE_DESCRIPTION("BCM2835 clock driver"); -MODULE_LICENSE("GPL"); -- cgit From 94511ebc6810142eb36c8b935e1713a15db94d28 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Wed, 22 Feb 2023 12:14:38 +0000 Subject: kbuild, clk: remove MODULE_LICENSE in non-modules Since commit 8b41fc4454e ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. So remove it in the files in this commit, none of which can be built as modules. Signed-off-by: Nick Alcock Suggested-by: Luis Chamberlain Cc: Luis Chamberlain Cc: linux-modules@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Hitomi Hasegawa Cc: Michael Turquette Cc: Stephen Boyd Cc: linux-clk@vger.kernel.org Link: https://lore.kernel.org/r/20230222121453.91915-13-nick.alcock@oracle.com Acked-by: Conor Dooley Reviewed-by: Conor Dooley Signed-off-by: Stephen Boyd --- drivers/clk/clk-fixed-mmio.c | 1 - drivers/clk/clk-fsl-sai.c | 1 - drivers/clk/hisilicon/clk-hi3559a.c | 1 - drivers/clk/microchip/clk-mpfs-ccc.c | 1 - 4 files changed, 4 deletions(-) diff --git a/drivers/clk/clk-fixed-mmio.c b/drivers/clk/clk-fixed-mmio.c index 5225d17d6b3f..8609fca29cc4 100644 --- a/drivers/clk/clk-fixed-mmio.c +++ b/drivers/clk/clk-fixed-mmio.c @@ -99,4 +99,3 @@ module_platform_driver(of_fixed_mmio_clk_driver); MODULE_AUTHOR("Jan Kotas "); MODULE_DESCRIPTION("Memory Mapped IO Fixed clock driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/clk-fsl-sai.c b/drivers/clk/clk-fsl-sai.c index 6238fcea0467..ee5baf993ff2 100644 --- a/drivers/clk/clk-fsl-sai.c +++ b/drivers/clk/clk-fsl-sai.c @@ -88,5 +88,4 @@ module_platform_driver(fsl_sai_clk_driver); MODULE_DESCRIPTION("Freescale SAI bitclock-as-a-clock driver"); MODULE_AUTHOR("Michael Walle "); -MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:fsl-sai-clk"); diff --git a/drivers/clk/hisilicon/clk-hi3559a.c b/drivers/clk/hisilicon/clk-hi3559a.c index 9ea1a80acbe8..8036bd8cbb0a 100644 --- a/drivers/clk/hisilicon/clk-hi3559a.c +++ b/drivers/clk/hisilicon/clk-hi3559a.c @@ -841,5 +841,4 @@ static void __exit hi3559av100_crg_exit(void) module_exit(hi3559av100_crg_exit); -MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("HiSilicon Hi3559AV100 CRG Driver"); diff --git a/drivers/clk/microchip/clk-mpfs-ccc.c b/drivers/clk/microchip/clk-mpfs-ccc.c index 0ddc73e07be4..bce61c45e967 100644 --- a/drivers/clk/microchip/clk-mpfs-ccc.c +++ b/drivers/clk/microchip/clk-mpfs-ccc.c @@ -291,4 +291,3 @@ module_exit(clk_ccc_exit); MODULE_DESCRIPTION("Microchip PolarFire SoC Clock Conditioning Circuitry Driver"); MODULE_AUTHOR("Conor Dooley "); -MODULE_LICENSE("GPL"); -- cgit From 0ffad67784a097beccf34d297ddd1b0773b3b8a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 25 Feb 2023 21:39:47 -0800 Subject: clk: HI655X: select REGMAP instead of depending on it REGMAP is a hidden (not user visible) symbol. Users cannot set it directly thru "make *config", so drivers should select it instead of depending on it if they need it. Consistently using "select" or "depends on" can also help reduce Kconfig circular dependency issues. Therefore, change the use of "depends on REGMAP" to "select REGMAP". Fixes: 3a49afb84ca0 ("clk: enable hi655x common clk automatically") Signed-off-by: Randy Dunlap Cc: Riku Voipio Cc: Stephen Boyd Cc: Michael Turquette Cc: linux-clk@vger.kernel.org Link: https://lore.kernel.org/r/20230226053953.4681-3-rdunlap@infradead.org Signed-off-by: Stephen Boyd --- drivers/clk/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index b6c5bf69a2b2..1eef05bb1f99 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -91,7 +91,7 @@ config COMMON_CLK_RK808 config COMMON_CLK_HI655X tristate "Clock driver for Hi655x" if EXPERT depends on (MFD_HI655X_PMIC || COMPILE_TEST) - depends on REGMAP + select REGMAP default MFD_HI655X_PMIC help This driver supports the hi655x PMIC clock. This -- cgit From 26243872fe26ec0df7d81766253d00213990e382 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 22 Feb 2023 12:46:11 +0000 Subject: MAINTAINERS: add missing clock driver coverage for Microchip FPGAs When the CCC support was added, the clock binding coverage was converted to a regex in commit 71c8517e004b ("MAINTAINERS: update polarfire soc clock binding"), but the coverage for the clock drivers themselves was not updated. Rectify that now. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230222124610.257101-1-conor.dooley@microchip.com Signed-off-by: Stephen Boyd --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f305..cfd630ce68e7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17989,7 +17989,7 @@ F: Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml F: Documentation/devicetree/bindings/usb/microchip,mpfs-musb.yaml F: arch/riscv/boot/dts/microchip/ F: drivers/char/hw_random/mpfs-rng.c -F: drivers/clk/microchip/clk-mpfs.c +F: drivers/clk/microchip/clk-mpfs*.c F: drivers/i2c/busses/i2c-microchip-corei2c.c F: drivers/mailbox/mailbox-mpfs.c F: drivers/pci/controller/pcie-microchip-host.c -- cgit From 633a12fda6536a1a17bcea29502e777e86a4547e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 3 Jan 2023 15:21:20 +0100 Subject: interconnect: qcom: qcm2290: Fix MASTER_SNOC_BIMC_NRT Due to what seems to be a copy-paste error, the _NRT master was identical to the _RT master, which should not be the case.. Fix it using the values available from the downstream kernel [1]. [1] https://android.googlesource.com/kernel/msm-extra/devicetree/+/refs/heads/android-msm-bramble-4.19-android11-qpr1/qcom/scuba-bus.dtsi#127 Fixes: 1a14b1ac3935 ("interconnect: qcom: Add QCM2290 driver support") Signed-off-by: Konrad Dybcio Acked-by: Shawn Guo Link: https://lore.kernel.org/r/20230103142120.15605-1-konrad.dybcio@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/qcm2290.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/interconnect/qcom/qcm2290.c b/drivers/interconnect/qcom/qcm2290.c index 0da612d6398c..a29cdb4fac03 100644 --- a/drivers/interconnect/qcom/qcm2290.c +++ b/drivers/interconnect/qcom/qcm2290.c @@ -147,9 +147,9 @@ static struct qcom_icc_node mas_snoc_bimc_nrt = { .name = "mas_snoc_bimc_nrt", .buswidth = 16, .qos.ap_owned = true, - .qos.qos_port = 2, + .qos.qos_port = 3, .qos.qos_mode = NOC_QOS_MODE_BYPASS, - .mas_rpm_id = 163, + .mas_rpm_id = 164, .slv_rpm_id = -1, .num_links = ARRAY_SIZE(mas_snoc_bimc_nrt_links), .links = mas_snoc_bimc_nrt_links, -- cgit From 9bbf5feecc7eab2c370496c1c161bbfe62084028 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 27 Feb 2023 23:23:17 +0800 Subject: dm thin: fix deadlock when swapping to thin device This is an already known issue that dm-thin volume cannot be used as swap, otherwise a deadlock may happen when dm-thin internal memory demand triggers swap I/O on the dm-thin volume itself. But thanks to commit a666e5c05e7c ("dm: fix deadlock when swapping to encrypted device"), the limit_swap_bios target flag can also be used for dm-thin to avoid the recursive I/O when it is used as swap. Fix is to simply set ti->limit_swap_bios to true in both pool_ctr() and thin_ctr(). In my test, I create a dm-thin volume /dev/vg/swap and use it as swap device. Then I run fio on another dm-thin volume /dev/vg/main and use large --blocksize to trigger swap I/O onto /dev/vg/swap. The following fio command line is used in my test, fio --name recursive-swap-io --lockmem 1 --iodepth 128 \ --ioengine libaio --filename /dev/vg/main --rw randrw \ --blocksize 1M --numjobs 32 --time_based --runtime=12h Without this fix, the whole system can be locked up within 15 seconds. With this fix, there is no any deadlock or hung task observed after 2 hours of running fio. Furthermore, if blocksize is changed from 1M to 128M, after around 30 seconds fio has no visible I/O, and the out-of-memory killer message shows up in kernel message. After around 20 minutes all fio processes are killed and the whole system is back to being alive. This is exactly what is expected when recursive I/O happens on dm-thin volume when it is used as swap. Depends-on: a666e5c05e7c ("dm: fix deadlock when swapping to encrypted device") Cc: stable@vger.kernel.org Signed-off-by: Coly Li Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 6cd105c1cef3..13d4677baafd 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3369,6 +3369,7 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv) pt->low_water_blocks = low_water_blocks; pt->adjusted_pf = pt->requested_pf = pf; ti->num_flush_bios = 1; + ti->limit_swap_bios = true; /* * Only need to enable discards if the pool should pass @@ -4249,6 +4250,7 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; ti->num_flush_bios = 1; + ti->limit_swap_bios = true; ti->flush_supported = true; ti->accounts_remapped_io = true; ti->per_io_data_size = sizeof(struct dm_thin_endio_hook); -- cgit From fb294b1c0ba982144ca467a75e7d01ff26304e2b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 6 Mar 2023 11:17:58 -0500 Subject: dm crypt: add cond_resched() to dmcrypt_write() The loop in dmcrypt_write may be running for unbounded amount of time, thus we need cond_resched() in it. This commit fixes the following warning: [ 3391.153255][ C12] watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [dmcrypt_write/2:2897] ... [ 3391.387210][ C12] Call trace: [ 3391.390338][ C12] blk_attempt_bio_merge.part.6+0x38/0x158 [ 3391.395970][ C12] blk_attempt_plug_merge+0xc0/0x1b0 [ 3391.401085][ C12] blk_mq_submit_bio+0x398/0x550 [ 3391.405856][ C12] submit_bio_noacct+0x308/0x380 [ 3391.410630][ C12] dmcrypt_write+0x1e4/0x208 [dm_crypt] [ 3391.416005][ C12] kthread+0x130/0x138 [ 3391.419911][ C12] ret_from_fork+0x10/0x18 Reported-by: yangerkun Fixes: dc2676210c42 ("dm crypt: offload writes to thread") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 87c5706131f2..faba1be572f9 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1937,6 +1937,7 @@ pop_from_list: io = crypt_io_from_node(rb_first(&write_tree)); rb_erase(&io->rb_node, &write_tree); kcryptd_io_write(io); + cond_resched(); } while (!RB_EMPTY_ROOT(&write_tree)); blk_finish_plug(&plug); } -- cgit From 89dc65a7cc8a119c395c0931b12d7a514f9d2bcc Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 28 Feb 2023 19:26:55 -0500 Subject: clk: k210: remove an implicit 64-bit division The K210 clock driver depends on SOC_CANAAN, which is only selectable when !MMU on RISC-V. !MMU is not possible on 32-bit yet, but patches have been sent for its enabling. The kernel test robot reported this implicit 64-bit division there. Replace the implicit division with an explicit one. Reported-by: kernel test robot Link: https://lore.kernel.org/linux-riscv/202301201538.zNlqgE4L-lkp@intel.com/ Signed-off-by: Conor Dooley Signed-off-by: Jesse Taube Link: https://lore.kernel.org/r/20230301002657.352637-2-Mr.Bossman075@gmail.com Reviewed-by: Damien Le Moal Signed-off-by: Stephen Boyd --- drivers/clk/clk-k210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-k210.c b/drivers/clk/clk-k210.c index 67a7cb3503c3..4eed667eddaf 100644 --- a/drivers/clk/clk-k210.c +++ b/drivers/clk/clk-k210.c @@ -495,7 +495,7 @@ static unsigned long k210_pll_get_rate(struct clk_hw *hw, f = FIELD_GET(K210_PLL_CLKF, reg) + 1; od = FIELD_GET(K210_PLL_CLKOD, reg) + 1; - return (u64)parent_rate * f / (r * od); + return div_u64((u64)parent_rate * f, r * od); } static const struct clk_ops k210_pll_ops = { -- cgit From 672a58fc7c477e59981653a11241566870fff852 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 24 Feb 2023 13:30:45 +0530 Subject: arm64: dts: qcom: sm8150: Fix the iommu mask used for PCIe controllers The iommu mask should be 0x3f as per Qualcomm internal documentation. Without the correct mask, the PCIe transactions from the endpoint will result in SMMU faults. Hence, fix it! Cc: stable@vger.kernel.org # 5.19 Fixes: a1c86c680533 ("arm64: dts: qcom: sm8150: Add PCIe nodes") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Konrad Dybcio Reviewed-by: Bhupesh Sharma Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230224080045.6577-1-manivannan.sadhasivam@linaro.org --- arch/arm64/boot/dts/qcom/sm8150.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index fd20096cfc6e..13e0ce828606 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -1826,7 +1826,7 @@ "slave_q2a", "tbu"; - iommus = <&apps_smmu 0x1d80 0x7f>; + iommus = <&apps_smmu 0x1d80 0x3f>; iommu-map = <0x0 &apps_smmu 0x1d80 0x1>, <0x100 &apps_smmu 0x1d81 0x1>; @@ -1925,7 +1925,7 @@ assigned-clocks = <&gcc GCC_PCIE_1_AUX_CLK>; assigned-clock-rates = <19200000>; - iommus = <&apps_smmu 0x1e00 0x7f>; + iommus = <&apps_smmu 0x1e00 0x3f>; iommu-map = <0x0 &apps_smmu 0x1e00 0x1>, <0x100 &apps_smmu 0x1e01 0x1>; -- cgit From 8013295662f55696e5953ef14c31ba03721adf8f Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Sat, 11 Feb 2023 10:54:15 +0530 Subject: arm64: dts: qcom: sc8280xp: Add label property to vadc channel nodes For uniquely identifying the vadc channels, label property has to be used. The initial commit adding vadc support assumed that the driver will use the unit address along with the node name to identify the channels. But this assumption is now broken by, commit 701c875aded8 ("iio: adc: qcom-spmi-adc5: Fix the channel name") that stripped unit address from channel names. This results in probe failure of the vadc driver: [ 8.380370] iio iio:device0: tried to double register : in_temp_pmic-die-temp_input [ 8.380383] qcom-spmi-adc5 c440000.spmi:pmic@0:adc@3100: Failed to register sysfs interfaces [ 8.380386] qcom-spmi-adc5: probe of c440000.spmi:pmic@0:adc@3100 failed with error -16 Hence, let's get rid of the assumption about drivers and rely on label property to uniquely identify the channels. The labels are derived from the schematics for each PMIC. For internal adc channels such as die and xo, the PMIC names are used as a prefix. Fixes: 7c0151347401 ("arm64: dts: qcom: sc8280xp-x13s: Add PM8280_{1/2} ADC_TM5 channels") Fixes: 9d41cd17394a ("arm64: dts: qcom: sc8280xp-x13s: Add PMR735A VADC channel") Fixes: 3375151a7185 ("arm64: dts: qcom: sc8280xp-x13s: Add PM8280_{1/2} VADC channels") Fixes: 9a6b3042c533 ("arm64: dts: qcom: sc8280xp-x13s: Add PMK8280 VADC channels") Reported-by: Steev Klimaszewski Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230211052415.14581-1-manivannan.sadhasivam@linaro.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index 98e71b933437..96b36ce94ce0 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -772,75 +772,88 @@ pmic-die-temp@3 { reg = ; qcom,pre-scaling = <1 1>; + label = "pmk8350_die_temp"; }; xo-therm@44 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "pmk8350_xo_therm"; }; pmic-die-temp@103 { reg = ; qcom,pre-scaling = <1 1>; + label = "pmc8280_1_die_temp"; }; sys-therm@144 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm1"; }; sys-therm@145 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm2"; }; sys-therm@146 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm3"; }; sys-therm@147 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm4"; }; pmic-die-temp@303 { reg = ; qcom,pre-scaling = <1 1>; + label = "pmc8280_2_die_temp"; }; sys-therm@344 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm5"; }; sys-therm@345 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm6"; }; sys-therm@346 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm7"; }; sys-therm@347 { reg = ; qcom,hw-settle-time = <200>; qcom,ratiometric; + label = "sys_therm8"; }; pmic-die-temp@403 { reg = ; qcom,pre-scaling = <1 1>; + label = "pmr735a_die_temp"; }; }; -- cgit From 205c91fb6aca5f8bad5346181575a7ef78e43cea Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 16 Feb 2023 13:49:21 +0100 Subject: arm64: dts: qcom: sm6115: Un-enable SPI5 by default The commit mentioned in the fixes tag erroneously enabled SPI5 unconditionally. Undo it. Fixes: 25aab0b852d6 ("arm64: dts: qcom: sm6115: Add geni debug uart node for qup0") Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230216124921.3985834-1-konrad.dybcio@linaro.org --- arch/arm64/boot/dts/qcom/sm6115.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi index 4d6ec815b78b..fbd67d2c8d78 100644 --- a/arch/arm64/boot/dts/qcom/sm6115.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi @@ -1078,6 +1078,7 @@ dma-names = "tx", "rx"; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; }; -- cgit From 11d5e41f5e129e39bddedc7244a0946a802d2e8e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 9 Jan 2023 14:56:47 +0100 Subject: arm64: dts: qcom: sm6375: Add missing power-domain-named to CDSP This was omitted when first introducing the node. Fix it. Fixes: fe6fd26aeddf ("arm64: dts: qcom: sm6375: Add ADSP&CDSP") Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230109135647.339224-5-konrad.dybcio@linaro.org --- arch/arm64/boot/dts/qcom/sm6375.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi index 31b88c738510..068ee4f72485 100644 --- a/arch/arm64/boot/dts/qcom/sm6375.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi @@ -1209,6 +1209,7 @@ clock-names = "xo"; power-domains = <&rpmpd SM6375_VDDCX>; + power-domain-names = "cx"; memory-region = <&pil_cdsp_mem>; -- cgit From 4059297ed0a5adf8e5fd0bd734d702a24202c02e Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 9 Feb 2023 09:45:10 +0200 Subject: arm64: dts: qcom: sm8550: Add bias pull up value to tlmm i2c data clk states The default bias pull up value for the tlmm i2c data clk states is 2.2kOhms. Add this value to make sure the driver factors in the i2c pull up bit when writing the config register. Signed-off-by: Abel Vesa Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230209074510.4153294-2-abel.vesa@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index ff4d342c0725..aa84a36c394a 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -2691,7 +2691,7 @@ pins = "gpio28", "gpio29"; function = "qup1_se0"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c1_data_clk: qup-i2c1-data-clk-state { @@ -2699,7 +2699,7 @@ pins = "gpio32", "gpio33"; function = "qup1_se1"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c2_data_clk: qup-i2c2-data-clk-state { @@ -2707,7 +2707,7 @@ pins = "gpio36", "gpio37"; function = "qup1_se2"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c3_data_clk: qup-i2c3-data-clk-state { @@ -2715,7 +2715,7 @@ pins = "gpio40", "gpio41"; function = "qup1_se3"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c4_data_clk: qup-i2c4-data-clk-state { @@ -2723,7 +2723,7 @@ pins = "gpio44", "gpio45"; function = "qup1_se4"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c5_data_clk: qup-i2c5-data-clk-state { @@ -2731,7 +2731,7 @@ pins = "gpio52", "gpio53"; function = "qup1_se5"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c6_data_clk: qup-i2c6-data-clk-state { @@ -2739,7 +2739,7 @@ pins = "gpio48", "gpio49"; function = "qup1_se6"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c8_data_clk: qup-i2c8-data-clk-state { @@ -2747,14 +2747,14 @@ pins = "gpio57"; function = "qup2_se0_l1_mira"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; sda-pins { pins = "gpio56"; function = "qup2_se0_l0_mira"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; }; @@ -2763,7 +2763,7 @@ pins = "gpio60", "gpio61"; function = "qup2_se1"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c10_data_clk: qup-i2c10-data-clk-state { @@ -2771,7 +2771,7 @@ pins = "gpio64", "gpio65"; function = "qup2_se2"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c11_data_clk: qup-i2c11-data-clk-state { @@ -2779,7 +2779,7 @@ pins = "gpio68", "gpio69"; function = "qup2_se3"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c12_data_clk: qup-i2c12-data-clk-state { @@ -2787,7 +2787,7 @@ pins = "gpio2", "gpio3"; function = "qup2_se4"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c13_data_clk: qup-i2c13-data-clk-state { @@ -2795,7 +2795,7 @@ pins = "gpio80", "gpio81"; function = "qup2_se5"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_i2c15_data_clk: qup-i2c15-data-clk-state { @@ -2803,7 +2803,7 @@ pins = "gpio72", "gpio106"; function = "qup2_se7"; drive-strength = <2>; - bias-pull-up; + bias-pull-up = <2200>; }; qup_spi0_cs: qup-spi0-cs-state { -- cgit From 27072f2ffb29283b9a44d878204c86c08d86b37f Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 16 Feb 2023 12:08:03 +0100 Subject: arm64: dts: qcom: sm8550: Use correct CPU compatibles Use the correct compatibles for the four kinds of CPU cores used on SM8550, based on the value of their MIDR_EL1 registers: CPU7: 0x411fd4e0 - CX3 r1p1 CPU5-6: 0x412fd470 - CA710 r?p? CPU3-4: 0x411fd4d0 - CA715 r?p? CPU0-2: 0x411fd461 - CA510 r?p? Signed-off-by: Konrad Dybcio Acked-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230216110803.3945747-2-konrad.dybcio@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index aa84a36c394a..25f51245fe9b 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -66,7 +66,7 @@ CPU0: cpu@0 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a510"; reg = <0 0>; enable-method = "psci"; next-level-cache = <&L2_0>; @@ -89,7 +89,7 @@ CPU1: cpu@100 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a510"; reg = <0 0x100>; enable-method = "psci"; next-level-cache = <&L2_100>; @@ -108,7 +108,7 @@ CPU2: cpu@200 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a510"; reg = <0 0x200>; enable-method = "psci"; next-level-cache = <&L2_200>; @@ -127,7 +127,7 @@ CPU3: cpu@300 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a715"; reg = <0 0x300>; enable-method = "psci"; next-level-cache = <&L2_300>; @@ -146,7 +146,7 @@ CPU4: cpu@400 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a715"; reg = <0 0x400>; enable-method = "psci"; next-level-cache = <&L2_400>; @@ -165,7 +165,7 @@ CPU5: cpu@500 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a710"; reg = <0 0x500>; enable-method = "psci"; next-level-cache = <&L2_500>; @@ -184,7 +184,7 @@ CPU6: cpu@600 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-a710"; reg = <0 0x600>; enable-method = "psci"; next-level-cache = <&L2_600>; @@ -203,7 +203,7 @@ CPU7: cpu@700 { device_type = "cpu"; - compatible = "qcom,kryo"; + compatible = "arm,cortex-x3"; reg = <0 0x700>; enable-method = "psci"; next-level-cache = <&L2_700>; -- cgit From 052750a4444577e6067bdf73dd5ff92876f59ef6 Mon Sep 17 00:00:00 2001 From: Jianhua Lu Date: Tue, 21 Feb 2023 20:36:33 +0800 Subject: arm64: dts: qcom: sm8250-xiaomi-elish: Correct venus firmware path Missing vendor name for venus firmware path. Add it. Fixes: a41b617530bf ("arm64: dts: qcom: sm8250: Add device tree for Xiaomi Mi Pad 5 Pro") Signed-off-by: Jianhua Lu Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230221123633.25145-1-lujianhua000@gmail.com --- arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts index acaa99c5ff8b..a85d47f7a9e8 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts +++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish.dts @@ -625,6 +625,6 @@ }; &venus { - firmware-name = "qcom/sm8250/elish/venus.mbn"; + firmware-name = "qcom/sm8250/xiaomi/elish/venus.mbn"; status = "okay"; }; -- cgit From 780f6a9afe8b0e303406a39f6968cf1daa6c3d51 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Jan 2023 13:20:52 -0800 Subject: lib: zstd: Fix -Wstringop-overflow warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following -Wstringop-overflow warning when building with GCC 11+: lib/zstd/decompress/huf_decompress.c: In function ‘HUF_readDTableX2_wksp’: lib/zstd/decompress/huf_decompress.c:700:5: warning: ‘HUF_fillDTableX2.constprop’ accessing 624 bytes in a region of size 52 [-Wstringop-overflow=] 700 | HUF_fillDTableX2(dt, maxTableLog, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 701 | wksp->sortedSymbol, sizeOfSort, | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 702 | wksp->rankStart0, wksp->rankVal, maxW, | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 703 | tableLog+1, | ~~~~~~~~~~~ 704 | wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32)); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lib/zstd/decompress/huf_decompress.c:700:5: note: referencing argument 6 of type ‘U32 (*)[13]’ {aka ‘unsigned int (*)[13]’} lib/zstd/decompress/huf_decompress.c:571:13: note: in a call to function ‘HUF_fillDTableX2.constprop’ 571 | static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, | ^~~~~~~~~~~~~~~~ by using pointer notation instead of array notation. This is one of the last remaining warnings to be fixed before globally enabling -Wstringop-overflow. Co-developed-by: Gustavo A. R. Silva Signed-off-by: Gustavo A. R. Silva Cc: Nick Terrell Signed-off-by: Kees Cook Signed-off-by: Nick Terrell --- lib/zstd/decompress/huf_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c index 89b269a641c7..60958afebc41 100644 --- a/lib/zstd/decompress/huf_decompress.c +++ b/lib/zstd/decompress/huf_decompress.c @@ -985,7 +985,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, const sortedSymbol_t* sortedList, - const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32* rankStart, rankValCol_t *rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) { U32* const rankVal = rankValOrigin[0]; -- cgit From 038505c41f0aad26ef101f4f7f6e111531c3914f Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 15 Feb 2023 15:19:17 -0800 Subject: lib: zstd: Backport fix for in-place decompression Backport the relevant part of upstream commit 5b266196 [0]. This fixes in-place decompression for x86-64 kernel decompression. It uses a bound of 131072 + (uncompressed_size >> 8), which can be violated after upstream commit 6a7ede3d [1], as zstd can use part of the output buffer as temporary storage, and without this patch needs a bound of ~262144. The fix is for zstd to detect that the input and output buffers overlap, so that zstd knows it can't use the overlapping portion of the output buffer as tempoary storage. If the margin is not large enough, this will ensure that zstd will fail the decompression, rather than overwriting part of the input data, and causing corruption. This fix has been landed upstream and is in release v1.5.4. That commit also adds unit and fuzz tests to verify that the margin we use is respected, and correct. That means that the fix is well tested upstream. I have not been able to reproduce the potential bug in x86-64 kernel decompression locally, nor have I recieved reports of failures to decompress the kernel. It is possible that compression saves enough space to make it very hard for the issue to appear. I've boot tested the zstd compressed kernel on x86-64 and i386 with this patch, which uses in-place decompression, and sanity tested zstd compression in btrfs / squashfs to make sure that we don't see any issues, but other uses of zstd shouldn't be affected, because they don't use in-place decompression. Thanks to Vasily Gorbik for debugging a related issue on s390, which was triggered by the same commit, but was a bug in how __decompress() was called [2]. And to Sasha Levin for the CC alerting me of the issue. [0] https://github.com/facebook/zstd/commit/5b266196a41e6a15e21bd4f0eeab43b938db1d90 [1] https://github.com/facebook/zstd/commit/6a7ede3dfccbf3e0a5928b4224a039c260dcff72 [2] https://lore.kernel.org/r/patch-1.thread-41c676.git-41c676c2d153.your-ad-here.call-01675030179-ext-9637@work.hours CC: Vasily Gorbik CC: Heiko Carstens CC: Sasha Levin CC: Yann Collet Signed-off-by: Nick Terrell --- lib/zstd/decompress/zstd_decompress.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c index b9b935a9f5c0..6b3177c94711 100644 --- a/lib/zstd/decompress/zstd_decompress.c +++ b/lib/zstd/decompress/zstd_decompress.c @@ -798,7 +798,7 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, if (srcSize == 0) return 0; RETURN_ERROR(dstBuffer_null, ""); } - ZSTD_memcpy(dst, src, srcSize); + ZSTD_memmove(dst, src, srcSize); return srcSize; } @@ -858,6 +858,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* Loop on each block */ while (1) { + BYTE* oBlockEnd = oend; size_t decodedSize; blockProperties_t blockProperties; size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); @@ -867,16 +868,34 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, remainingSrcSize -= ZSTD_blockHeaderSize; RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); + if (ip >= op && ip < oBlockEnd) { + /* We are decompressing in-place. Limit the output pointer so that we + * don't overwrite the block that we are currently reading. This will + * fail decompression if the input & output pointers aren't spaced + * far enough apart. + * + * This is important to set, even when the pointers are far enough + * apart, because ZSTD_decompressBlock_internal() can decide to store + * literals in the output buffer, after the block it is decompressing. + * Since we don't want anything to overwrite our input, we have to tell + * ZSTD_decompressBlock_internal to never write past ip. + * + * See ZSTD_allocateLiteralsBuffer() for reference. + */ + oBlockEnd = op + (ip - op); + } + switch(blockProperties.blockType) { case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming); break; case bt_raw : + /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */ decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); break; case bt_rle : - decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); + decodedSize = ZSTD_setRleBlock(op, (size_t)(oBlockEnd-op), *ip, blockProperties.origSize); break; case bt_reserved : default: -- cgit From 6906598f1ce93761716d780b6e3f171e13f0f4ce Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Sun, 29 Jan 2023 14:14:36 +0100 Subject: zstd: Fix definition of assert() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit assert(x) should emit a warning if x is false. WARN_ON(x) emits a warning if x is true. Thus, assert(x) should be defined as WARN_ON(!x) rather than WARN_ON(x). Signed-off-by: Jonathan Neuschäfer Signed-off-by: Nick Terrell --- lib/zstd/common/zstd_deps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h index 7a5bf44839c9..f06df065dec0 100644 --- a/lib/zstd/common/zstd_deps.h +++ b/lib/zstd/common/zstd_deps.h @@ -84,7 +84,7 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) { #include -#define assert(x) WARN_ON((x)) +#define assert(x) WARN_ON(!(x)) #endif /* ZSTD_DEPS_ASSERT */ #endif /* ZSTD_DEPS_NEED_ASSERT */ -- cgit From 2da789cda462bda93679f53ee38f9aa2309d47e8 Mon Sep 17 00:00:00 2001 From: Guillaume Tucker Date: Sat, 4 Feb 2023 14:34:54 +0100 Subject: selftests: amd-pstate: fix TEST_FILES Bring back the Python scripts that were initially added with TEST_GEN_FILES but now with TEST_FILES to avoid having them deleted when doing a clean. Also fix the way the architecture is being determined as they should also be installed when ARCH=x86_64 is provided explicitly. Then also append extra files to TEST_FILES and TEST_PROGS with += so they don't get discarded. Fixes: ba2d788aa873 ("selftests: amd-pstate: Trigger tbench benchmark and test cpus") Fixes: a49fb7218ed8 ("selftests: amd-pstate: Don't delete source files via Makefile") Signed-off-by: Guillaume Tucker Acked-by: Huang Rui Signed-off-by: Shuah Khan --- tools/testing/selftests/amd-pstate/Makefile | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/amd-pstate/Makefile b/tools/testing/selftests/amd-pstate/Makefile index 5fd1424db37d..c382f579fe94 100644 --- a/tools/testing/selftests/amd-pstate/Makefile +++ b/tools/testing/selftests/amd-pstate/Makefile @@ -4,10 +4,15 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +ARCH ?= $(shell uname -m 2>/dev/null || echo not) +ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -TEST_PROGS := run.sh -TEST_FILES := basic.sh tbench.sh gitsource.sh +ifeq (x86,$(ARCH)) +TEST_FILES += ../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py +TEST_FILES += ../../../power/x86/intel_pstate_tracer/intel_pstate_tracer.py +endif + +TEST_PROGS += run.sh +TEST_FILES += basic.sh tbench.sh gitsource.sh include ../lib.mk -- cgit From ced0f245ed951e2b8bd68f79c15238d7dd253662 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Mar 2023 11:14:50 +0100 Subject: kallsyms: add kallsyms_seqs_of_names to list of special symbols My randconfig build setup ran into another kallsyms warning: Inconsistent kallsyms data Try make KALLSYMS_EXTRA_PASS=1 as a workaround After adding some debugging code to kallsyms.c, I saw that the recently added kallsyms_seqs_of_names symbol can sometimes cause the second stage table to be slightly longer than the first stage, which makes the build inconsistent. Add it to the exception table that contains all other kallsyms-generated symbols. Fixes: 60443c88f3a8 ("kallsyms: Improve the performance of kallsyms_lookup_name()") Signed-off-by: Arnd Bergmann Reviewed-by: Zhen Lei Signed-off-by: Masahiro Yamada --- scripts/kallsyms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 8a68179a98a3..a239a87e7bec 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -119,6 +119,7 @@ static bool is_ignored_symbol(const char *name, char type) "kallsyms_markers", "kallsyms_token_table", "kallsyms_token_index", + "kallsyms_seqs_of_names", /* Exclude linker generated symbols which vary between passes */ "_SDA_BASE_", /* ppc */ "_SDA2_BASE_", /* ppc */ -- cgit From 77bf4b3ed42e31d29b255fcd6530fb7a1e217e89 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Mon, 6 Mar 2023 15:55:27 +0200 Subject: soc: qcom: llcc: Fix slice configuration values for SC8280XP The slice IDs for CVPFW, CPUSS1 and CPUWHT currently overflow the 32bit LLCC config registers, which means it is writing beyond the upper limit of the ATTR0_CFGn and ATTR1_CFGn range of registers. But the most obvious impact is the fact that the mentioned slices do not get configured at all, which will result in reduced performance. Fix that by using the slice ID values taken from the latest LLCC SC table. Fixes: ec69dfbdc426 ("soc: qcom: llcc: Add sc8180x and sc8280xp configurations") Cc: stable@vger.kernel.org # 5.19+ Signed-off-by: Abel Vesa Tested-by: Juerg Haefliger Reviewed-by: Sai Prakash Ranjan Acked-by: Konrad Dybcio Reviewed-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230306135527.509796-1-abel.vesa@linaro.org --- drivers/soc/qcom/llcc-qcom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c index 23ce2f78c4ed..26efe12012a0 100644 --- a/drivers/soc/qcom/llcc-qcom.c +++ b/drivers/soc/qcom/llcc-qcom.c @@ -191,9 +191,9 @@ static const struct llcc_slice_config sc8280xp_data[] = { { LLCC_CVP, 28, 512, 3, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, { LLCC_APTCM, 30, 1024, 3, 1, 0x0, 0x1, 1, 0, 0, 1, 0, 0 }, { LLCC_WRCACHE, 31, 1024, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 }, - { LLCC_CVPFW, 32, 512, 1, 0, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, - { LLCC_CPUSS1, 33, 2048, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, - { LLCC_CPUHWT, 36, 512, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 }, + { LLCC_CVPFW, 17, 512, 1, 0, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, + { LLCC_CPUSS1, 3, 2048, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 }, + { LLCC_CPUHWT, 5, 512, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 }, }; static const struct llcc_slice_config sdm845_data[] = { -- cgit From 947007419b60d5e06aa54b0f411c123db7f45a44 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Sun, 5 Mar 2023 11:32:33 +0100 Subject: soc: qcom: rmtfs: fix error handling reading qcom,vmid of_property_count_u32_elems returns a negative integer when an error happens , but since the value was assigned to an unsigned integer, the check never worked correctly. Also print the correct variable in the error print, ret isn't used here. Fixes: e656cd0bcf3d ("soc: qcom: rmtfs: Optionally map RMTFS to more VMs") Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230305-rmtfs-vmid-fix-v1-1-6a7206081602@z3ntu.xyz --- drivers/soc/qcom/rmtfs_mem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c index 2d3ee22b9249..f57756220198 100644 --- a/drivers/soc/qcom/rmtfs_mem.c +++ b/drivers/soc/qcom/rmtfs_mem.c @@ -176,7 +176,8 @@ static int qcom_rmtfs_mem_probe(struct platform_device *pdev) struct reserved_mem *rmem; struct qcom_rmtfs_mem *rmtfs_mem; u32 client_id; - u32 num_vmids, vmid[NUM_MAX_VMIDS]; + u32 vmid[NUM_MAX_VMIDS]; + int num_vmids; int ret, i; rmem = of_reserved_mem_lookup(node); @@ -229,7 +230,7 @@ static int qcom_rmtfs_mem_probe(struct platform_device *pdev) num_vmids = of_property_count_u32_elems(node, "qcom,vmid"); if (num_vmids < 0) { - dev_err(&pdev->dev, "failed to count qcom,vmid elements: %d\n", ret); + dev_err(&pdev->dev, "failed to count qcom,vmid elements: %d\n", num_vmids); goto remove_cdev; } else if (num_vmids > NUM_MAX_VMIDS) { dev_warn(&pdev->dev, -- cgit From 749d56bd5cf311dd9b50cfc092d7a39309454077 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Sun, 5 Mar 2023 11:32:34 +0100 Subject: soc: qcom: rmtfs: handle optional qcom,vmid correctly Older platforms don't have qcom,vmid set, handle -EINVAL return value correctly. And since num_vmids is passed to of_property_read_u32_array later we should make sure it has a sane value before continuing. Fixes: e656cd0bcf3d ("soc: qcom: rmtfs: Optionally map RMTFS to more VMs") Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230305-rmtfs-vmid-fix-v1-2-6a7206081602@z3ntu.xyz --- drivers/soc/qcom/rmtfs_mem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c index f57756220198..538fa182169a 100644 --- a/drivers/soc/qcom/rmtfs_mem.c +++ b/drivers/soc/qcom/rmtfs_mem.c @@ -229,7 +229,10 @@ static int qcom_rmtfs_mem_probe(struct platform_device *pdev) } num_vmids = of_property_count_u32_elems(node, "qcom,vmid"); - if (num_vmids < 0) { + if (num_vmids == -EINVAL) { + /* qcom,vmid is optional */ + num_vmids = 0; + } else if (num_vmids < 0) { dev_err(&pdev->dev, "failed to count qcom,vmid elements: %d\n", num_vmids); goto remove_cdev; } else if (num_vmids > NUM_MAX_VMIDS) { -- cgit From eaba416688f4f074ea3bf2ef975c9e2dbb06712b Mon Sep 17 00:00:00 2001 From: Yang Xiwen Date: Wed, 1 Mar 2023 16:53:50 +0800 Subject: arm64: dts: qcom: msm8916-ufi: Fix sim card selection pinctrl The previous commit mistakenly introduced sim_ctrl_default as pinctrl, this is incorrect, the interface for sim card selection varies between different devices and should not be placed in the dtsi. This commit selects external SIM card slot for ufi001c as default. uf896 selects the correct SIM card slot automatically, thus does not need this pinctrl node. Fixes: faf69431464b ("arm64: dts: qcom: msm8916-thwc: Add initial device trees") Signed-off-by: Yang Xiwen Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/tencent_7036BCA256055D05F8C49D86DF7F0E2D1A05@qq.com --- arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts | 4 ---- arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts | 28 +++++++++++++++++++++-- arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi | 10 -------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts b/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts index c492db856190..82e260375174 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts +++ b/arch/arm64/boot/dts/qcom/msm8916-thwc-uf896.dts @@ -33,7 +33,3 @@ &gpio_leds_default { pins = "gpio81", "gpio82", "gpio83"; }; - -&sim_ctrl_default { - pins = "gpio1", "gpio2"; -}; diff --git a/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts b/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts index 700cf81cbf8c..8433c9710b1c 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts +++ b/arch/arm64/boot/dts/qcom/msm8916-thwc-ufi001c.dts @@ -25,6 +25,11 @@ gpios = <&msmgpio 20 GPIO_ACTIVE_HIGH>; }; +&mpss { + pinctrl-0 = <&sim_ctrl_default>; + pinctrl-names = "default"; +}; + &button_default { pins = "gpio37"; bias-pull-down; @@ -34,6 +39,25 @@ pins = "gpio20", "gpio21", "gpio22"; }; -&sim_ctrl_default { - pins = "gpio1", "gpio2"; +/* This selects the external SIM card slot by default */ +&msmgpio { + sim_ctrl_default: sim-ctrl-default-state { + esim-sel-pins { + pins = "gpio0", "gpio3"; + bias-disable; + output-low; + }; + + sim-en-pins { + pins = "gpio1"; + bias-disable; + output-low; + }; + + sim-sel-pins { + pins = "gpio2"; + bias-disable; + output-high; + }; + }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi b/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi index 790a9696da9d..cdf34b74fa8f 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916-ufi.dtsi @@ -92,9 +92,6 @@ }; &mpss { - pinctrl-0 = <&sim_ctrl_default>; - pinctrl-names = "default"; - status = "okay"; }; @@ -240,11 +237,4 @@ drive-strength = <2>; bias-disable; }; - - sim_ctrl_default: sim-ctrl-default-state { - function = "gpio"; - drive-strength = <2>; - bias-disable; - output-low; - }; }; -- cgit From 8a63441e83724fee1ef3fd37b237d40d90780766 Mon Sep 17 00:00:00 2001 From: Krishna chaitanya chundru Date: Tue, 28 Feb 2023 17:19:12 +0530 Subject: arm64: dts: qcom: sc7280: Mark PCIe controller as cache coherent If the controller is not marked as cache coherent, then kernel will try to ensure coherency during dma-ops and that may cause data corruption. So, mark the PCIe node as dma-coherent as the devices on PCIe bus are cache coherent. Cc: stable@vger.kernel.org Fixes: 92e0ee9f83b3 ("arm64: dts: qcom: sc7280: Add PCIe and PHY related node") Signed-off-by: Krishna chaitanya chundru Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/1677584952-17496-1-git-send-email-quic_krichai@quicinc.com --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index bdcb74925313..8f4ab6bd2886 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -2131,6 +2131,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pcie1_clkreq_n>; + dma-coherent; + iommus = <&apps_smmu 0x1c80 0x1>; iommu-map = <0x0 &apps_smmu 0x1c80 0x1>, -- cgit From 6b0313c2fa3d2cf991c9ffef6fae6e7ef592ce6d Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sat, 4 Mar 2023 15:41:07 +0800 Subject: cpuidle: psci: Iterate backwards over list in psci_pd_remove() In case that psci_pd_init_topology() fails for some reason, psci_pd_remove() will be responsible for deleting provider and removing genpd from psci_pd_providers list. There will be a failure when removing the cluster PD, because the cpu (child) PDs haven't been removed. [ 0.050232] CPUidle PSCI: init PM domain cpu0 [ 0.050278] CPUidle PSCI: init PM domain cpu1 [ 0.050329] CPUidle PSCI: init PM domain cpu2 [ 0.050370] CPUidle PSCI: init PM domain cpu3 [ 0.050422] CPUidle PSCI: init PM domain cpu-cluster0 [ 0.050475] PM: genpd_remove: unable to remove cpu-cluster0 [ 0.051412] PM: genpd_remove: removed cpu3 [ 0.051449] PM: genpd_remove: removed cpu2 [ 0.051499] PM: genpd_remove: removed cpu1 [ 0.051546] PM: genpd_remove: removed cpu0 Fix the problem by iterating the provider list reversely, so that parent PD gets removed after child's PDs like below. [ 0.029052] CPUidle PSCI: init PM domain cpu0 [ 0.029076] CPUidle PSCI: init PM domain cpu1 [ 0.029103] CPUidle PSCI: init PM domain cpu2 [ 0.029124] CPUidle PSCI: init PM domain cpu3 [ 0.029151] CPUidle PSCI: init PM domain cpu-cluster0 [ 0.029647] PM: genpd_remove: removed cpu0 [ 0.029666] PM: genpd_remove: removed cpu1 [ 0.029690] PM: genpd_remove: removed cpu2 [ 0.029714] PM: genpd_remove: removed cpu3 [ 0.029738] PM: genpd_remove: removed cpu-cluster0 Fixes: a65a397f2451 ("cpuidle: psci: Add support for PM domains by using genpd") Reviewed-by: Sudeep Holla Reviewed-by: Ulf Hansson Signed-off-by: Shawn Guo Cc: 5.10+ # 5.10+ Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-psci-domain.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index 6ad2954948a5..11316c3b14ca 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -106,7 +106,8 @@ static void psci_pd_remove(void) struct psci_pd_provider *pd_provider, *it; struct generic_pm_domain *genpd; - list_for_each_entry_safe(pd_provider, it, &psci_pd_providers, link) { + list_for_each_entry_safe_reverse(pd_provider, it, + &psci_pd_providers, link) { of_genpd_del_provider(pd_provider->node); genpd = of_genpd_remove_last(pd_provider->node); -- cgit From c29b97725c91add5021a3257c14ad1ed32eedf76 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 2 Mar 2023 18:12:12 +0200 Subject: ACPI: docs: enumeration: Correct reference to the I²C device data type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I²C peripheral devices that are connected to the controller are represented in the Linux kernel as objects of the struct i2c_client. Fix this in the documentation. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- Documentation/firmware-guide/acpi/enumeration.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst index b9dc0c603f36..56d9913a3370 100644 --- a/Documentation/firmware-guide/acpi/enumeration.rst +++ b/Documentation/firmware-guide/acpi/enumeration.rst @@ -19,7 +19,7 @@ possible we decided to do following: platform devices. - Devices behind real busses where there is a connector resource - are represented as struct spi_device or struct i2c_device. Note + are represented as struct spi_device or struct i2c_client. Note that standard UARTs are not busses so there is no struct uart_device, although some of them may be represented by struct serdev_device. -- cgit From 89b0411481967a2e8c91190a211a359966cfcf4b Mon Sep 17 00:00:00 2001 From: "Chia-Lin Kao (AceLan)" Date: Thu, 2 Mar 2023 17:33:00 +0800 Subject: ACPI: video: Add backlight=native DMI quirk for Dell Vostro 15 3535 Sometimes the system boots up with a acpi_video0 backlight interface which doesn't work. So add Dell Vostro 15 3535 into the video_detect_dmi_table to set it to native explicitly. Signed-off-by: Chia-Lin Kao (AceLan) Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 710ac640267d..14d6d81e536f 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -716,6 +716,13 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5515"), }, }, + { + .callback = video_detect_force_native, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 15 3535"), + }, + }, /* * Desktops which falsely report a backlight and which our heuristics -- cgit From 5adc409340b1fc82bc1175e602d14ac82ac685e3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2023 11:04:34 +0100 Subject: ACPI: x86: Introduce an acpi_quirk_skip_gpio_event_handlers() helper x86 ACPI boards which ship with only Android as their factory image usually have pretty broken ACPI tables, relying on everything being hardcoded in the factory kernel image and often disabling parts of the ACPI enumeration kernel code to avoid the broken tables causing issues. Part of this broken ACPI code is that sometimes these boards have _AEI ACPI GPIO event handlers which are broken. So far this has been dealt with in the platform/x86/x86-android-tablets.c module, which contains various workarounds for these devices, by it calling acpi_gpiochip_free_interrupts() on gpiochip-s with troublesome handlers to disable the handlers. But in some cases this is too late, if the handlers are of the edge type then gpiolib-acpi.c's code will already have run them at boot. This can cause issues such as GPIOs ending up as owned by "ACPI:OpRegion", making them unavailable for drivers which actually need them. Boards with these broken ACPI tables are already listed in drivers/acpi/x86/utils.c for e.g. acpi_quirk_skip_i2c_client_enumeration(). Extend the quirks mechanism for a new acpi_quirk_skip_gpio_event_handlers() helper, this re-uses the DMI-ids rather then having to duplicate the same DMI table in gpiolib-acpi.c . Also add the new ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS quirk to existing boards with troublesome ACPI gpio event handlers, so that the current acpi_gpiochip_free_interrupts() hack can be removed from x86-android-tablets.c . Signed-off-by: Hans de Goede Acked-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/utils.c | 24 +++++++++++++++++++++--- drivers/gpio/gpiolib-acpi.c | 3 +++ include/acpi/acpi_bus.h | 5 +++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index e45285d4e62a..4bf57cce30bb 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -251,6 +251,7 @@ bool force_storage_d3(void) #define ACPI_QUIRK_UART1_TTY_UART2_SKIP BIT(1) #define ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY BIT(2) #define ACPI_QUIRK_USE_ACPI_AC_AND_BATTERY BIT(3) +#define ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS BIT(4) static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { /* @@ -286,7 +287,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { }, .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | ACPI_QUIRK_UART1_TTY_UART2_SKIP | - ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | + ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), }, { .matches = { @@ -294,7 +296,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { DMI_MATCH(DMI_PRODUCT_NAME, "TF103C"), }, .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | - ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | + ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), }, { /* Lenovo Yoga Tablet 2 1050F/L */ @@ -336,7 +339,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"), }, .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | - ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY), + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | + ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), }, { /* Whitelabel (sold as various brands) TM800A550L */ @@ -413,6 +417,20 @@ int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *s return 0; } EXPORT_SYMBOL_GPL(acpi_quirk_skip_serdev_enumeration); + +bool acpi_quirk_skip_gpio_event_handlers(void) +{ + const struct dmi_system_id *dmi_id; + long quirks; + + dmi_id = dmi_first_match(acpi_quirk_skip_dmi_ids); + if (!dmi_id) + return false; + + quirks = (unsigned long)dmi_id->driver_data; + return (quirks & ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS); +} +EXPORT_SYMBOL_GPL(acpi_quirk_skip_gpio_event_handlers); #endif /* Lists of PMIC ACPI HIDs with an (often better) native charger driver */ diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index d8a421ce26a8..31ae0adbb295 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -536,6 +536,9 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) if (ACPI_FAILURE(status)) return; + if (acpi_quirk_skip_gpio_event_handlers()) + return; + acpi_walk_resources(handle, METHOD_NAME__AEI, acpi_gpiochip_alloc_event, acpi_gpio); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 0584e9f6e339..57acb895c038 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -657,6 +657,7 @@ static inline bool acpi_quirk_skip_acpi_ac_and_battery(void) #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS) bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev); int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip); +bool acpi_quirk_skip_gpio_event_handlers(void); #else static inline bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev) { @@ -668,6 +669,10 @@ acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip) *skip = false; return 0; } +static inline bool acpi_quirk_skip_gpio_event_handlers(void) +{ + return false; +} #endif #ifdef CONFIG_PM -- cgit From a5cb0695c5f0ac2ab0cedf2c1c0d75826cb73448 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2023 11:04:35 +0100 Subject: ACPI: x86: Add skip i2c clients quirk for Acer Iconia One 7 B1-750 The Acer Iconia One 7 B1-750 is a x86 tablet which ships with Android x86 as factory OS. The Android x86 kernel fork ignores I2C devices described in the DSDT, except for the PMIC and Audio codecs. As usual the Acer Iconia One 7 B1-750's DSDT contains a bunch of extra I2C devices which are not actually there, causing various resource conflicts. Add an ACPI_QUIRK_SKIP_I2C_CLIENTS quirk for the Acer Iconia One 7 B1-750 to the acpi_quirk_skip_dmi_ids table to woraround this. The DSDT also contains broken ACPI GPIO event handlers, disable those too. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/utils.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index 4bf57cce30bb..b2b0e2701333 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -280,6 +280,16 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { * need the x86-android-tablets module to properly work. */ #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS) + { + /* Acer Iconia One 7 B1-750 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "VESPA2"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | + ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), + }, { .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), -- cgit From 1a1e7540cf501dd5c8b57a577a155cdd13c7e202 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2023 11:04:36 +0100 Subject: ACPI: x86: Add skip i2c clients quirk for Lenovo Yoga Book X90 The Lenovo Yoga Book X90 is a x86 tablet which ships with Android x86 as factory OS. The Android x86 kernel fork ignores I2C devices described in the DSDT, except for the PMIC and Audio codecs. As usual the Lenovo Yoga Book X90's DSDT contains a bunch of extra I2C devices which are not actually there, causing various resource conflicts. Add an ACPI_QUIRK_SKIP_I2C_CLIENTS quirk for the Lenovo Yoga Book X90 to the acpi_quirk_skip_dmi_ids table to woraround this. The DSDT also contains broken ACPI GPIO event handlers, disable those too. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/utils.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index b2b0e2701333..da5727069d85 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -300,6 +300,17 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = { ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), }, + { + /* Lenovo Yoga Book X90F/L */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"), + }, + .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS | + ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY | + ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS), + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), -- cgit From a659e35ca0af2765f567bdfdccfa247eff0cdab8 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 7 Mar 2023 11:39:11 +0200 Subject: ASoC: SOF: Intel: MTL: Fix the device description Add the missing ops_free callback. Fixes: 064520e8aeaa ("ASoC: SOF: Intel: Add support for MeteorLake (MTL)") Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307093914.25409-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-mtl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/intel/pci-mtl.c b/sound/soc/sof/intel/pci-mtl.c index 6e4e6d4ef5a5..b183dc0014b4 100644 --- a/sound/soc/sof/intel/pci-mtl.c +++ b/sound/soc/sof/intel/pci-mtl.c @@ -46,6 +46,7 @@ static const struct sof_dev_desc mtl_desc = { .nocodec_tplg_filename = "sof-mtl-nocodec.tplg", .ops = &sof_mtl_ops, .ops_init = sof_mtl_ops_init, + .ops_free = hda_ops_free, }; /* PCI IDs */ -- cgit From 9eb2b4cac223095d2079a6d52b8bbddc6e064288 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 7 Mar 2023 11:39:12 +0200 Subject: ASoC: SOF: Intel: HDA: Fix device description Add the missing ops_free callback for APL/CNL/CML/JSL/TGL/EHL platforms. Fixes: 1da51943725f ("ASoC: SOF: Intel: hda: init NHLT for IPC4") Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307093914.25409-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-apl.c | 1 + sound/soc/sof/intel/pci-cnl.c | 2 ++ sound/soc/sof/intel/pci-icl.c | 1 + sound/soc/sof/intel/pci-tgl.c | 5 +++++ 4 files changed, 9 insertions(+) diff --git a/sound/soc/sof/intel/pci-apl.c b/sound/soc/sof/intel/pci-apl.c index 69279dcc92dc..aff6cb573c27 100644 --- a/sound/soc/sof/intel/pci-apl.c +++ b/sound/soc/sof/intel/pci-apl.c @@ -78,6 +78,7 @@ static const struct sof_dev_desc glk_desc = { .nocodec_tplg_filename = "sof-glk-nocodec.tplg", .ops = &sof_apl_ops, .ops_init = sof_apl_ops_init, + .ops_free = hda_ops_free, }; /* PCI IDs */ diff --git a/sound/soc/sof/intel/pci-cnl.c b/sound/soc/sof/intel/pci-cnl.c index 8db3f8d15b55..4c0c1c369dcd 100644 --- a/sound/soc/sof/intel/pci-cnl.c +++ b/sound/soc/sof/intel/pci-cnl.c @@ -48,6 +48,7 @@ static const struct sof_dev_desc cnl_desc = { .nocodec_tplg_filename = "sof-cnl-nocodec.tplg", .ops = &sof_cnl_ops, .ops_init = sof_cnl_ops_init, + .ops_free = hda_ops_free, }; static const struct sof_dev_desc cfl_desc = { @@ -111,6 +112,7 @@ static const struct sof_dev_desc cml_desc = { .nocodec_tplg_filename = "sof-cnl-nocodec.tplg", .ops = &sof_cnl_ops, .ops_init = sof_cnl_ops_init, + .ops_free = hda_ops_free, }; /* PCI IDs */ diff --git a/sound/soc/sof/intel/pci-icl.c b/sound/soc/sof/intel/pci-icl.c index d6cf75e357db..6785669113b3 100644 --- a/sound/soc/sof/intel/pci-icl.c +++ b/sound/soc/sof/intel/pci-icl.c @@ -79,6 +79,7 @@ static const struct sof_dev_desc jsl_desc = { .nocodec_tplg_filename = "sof-jsl-nocodec.tplg", .ops = &sof_cnl_ops, .ops_init = sof_cnl_ops_init, + .ops_free = hda_ops_free, }; /* PCI IDs */ diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index e80c4dfef85a..adc7314a1b57 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -48,6 +48,7 @@ static const struct sof_dev_desc tgl_desc = { .nocodec_tplg_filename = "sof-tgl-nocodec.tplg", .ops = &sof_tgl_ops, .ops_init = sof_tgl_ops_init, + .ops_free = hda_ops_free, }; static const struct sof_dev_desc tglh_desc = { @@ -110,6 +111,7 @@ static const struct sof_dev_desc ehl_desc = { .nocodec_tplg_filename = "sof-ehl-nocodec.tplg", .ops = &sof_tgl_ops, .ops_init = sof_tgl_ops_init, + .ops_free = hda_ops_free, }; static const struct sof_dev_desc adls_desc = { @@ -141,6 +143,7 @@ static const struct sof_dev_desc adls_desc = { .nocodec_tplg_filename = "sof-adl-nocodec.tplg", .ops = &sof_tgl_ops, .ops_init = sof_tgl_ops_init, + .ops_free = hda_ops_free, }; static const struct sof_dev_desc adl_desc = { @@ -172,6 +175,7 @@ static const struct sof_dev_desc adl_desc = { .nocodec_tplg_filename = "sof-adl-nocodec.tplg", .ops = &sof_tgl_ops, .ops_init = sof_tgl_ops_init, + .ops_free = hda_ops_free, }; static const struct sof_dev_desc adl_n_desc = { @@ -203,6 +207,7 @@ static const struct sof_dev_desc adl_n_desc = { .nocodec_tplg_filename = "sof-adl-nocodec.tplg", .ops = &sof_tgl_ops, .ops_init = sof_tgl_ops_init, + .ops_free = hda_ops_free, }; static const struct sof_dev_desc rpls_desc = { -- cgit From 1f320bdb29b644a2c9fb301a6fb2d6170e6417e9 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 7 Mar 2023 11:39:13 +0200 Subject: ASoC: SOF: Intel: SKL: Fix device description Add missing ops_free callback for SKL/KBL platforms. Fixes: 52d7939d10f2 ("ASoC: SOF: Intel: add ops for SKL/KBL") Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307093914.25409-4-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-skl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/intel/pci-skl.c b/sound/soc/sof/intel/pci-skl.c index 3a99dc444f92..5b4bccf81965 100644 --- a/sound/soc/sof/intel/pci-skl.c +++ b/sound/soc/sof/intel/pci-skl.c @@ -38,6 +38,7 @@ static struct sof_dev_desc skl_desc = { .nocodec_tplg_filename = "sof-skl-nocodec.tplg", .ops = &sof_skl_ops, .ops_init = sof_skl_ops_init, + .ops_free = hda_ops_free, }; static struct sof_dev_desc kbl_desc = { @@ -61,6 +62,7 @@ static struct sof_dev_desc kbl_desc = { .nocodec_tplg_filename = "sof-kbl-nocodec.tplg", .ops = &sof_skl_ops, .ops_init = sof_skl_ops_init, + .ops_free = hda_ops_free, }; /* PCI IDs */ -- cgit From 376f79bbf521fc37b871b536276319951b5bef3a Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 7 Mar 2023 11:39:14 +0200 Subject: ASOC: SOF: Intel: pci-tgl: Fix device description Add the missing ops_free callback. Fixes: 63d375b9f2a9 ("ASoC: SOF: Intel: pci-tgl: use RPL specific firmware definitions") Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307093914.25409-5-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index adc7314a1b57..22e769e0831d 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -239,6 +239,7 @@ static const struct sof_dev_desc rpls_desc = { .nocodec_tplg_filename = "sof-rpl-nocodec.tplg", .ops = &sof_tgl_ops, .ops_init = sof_tgl_ops_init, + .ops_free = hda_ops_free, }; static const struct sof_dev_desc rpl_desc = { @@ -270,6 +271,7 @@ static const struct sof_dev_desc rpl_desc = { .nocodec_tplg_filename = "sof-rpl-nocodec.tplg", .ops = &sof_tgl_ops, .ops_init = sof_tgl_ops_init, + .ops_free = hda_ops_free, }; /* PCI IDs */ -- cgit From 989a3e4479177d0f4afab8be1960731bc0ffbbd0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 7 Mar 2023 13:49:17 +0200 Subject: ASoC: SOF: ipc3: Check for upper size limit for the received message The sof_ipc3_rx_msg() checks for minimum size of a new rx message but it is missing the check for upper limit. Corrupted or compromised firmware might be able to take advantage of this to cause out of bounds reads outside of the message area. Reported-by: Curtis Malainey Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Curtis Malainey Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307114917.5124-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc3.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c index 3de64ea2dc9a..4493bbd7faf1 100644 --- a/sound/soc/sof/ipc3.c +++ b/sound/soc/sof/ipc3.c @@ -970,8 +970,9 @@ static void sof_ipc3_rx_msg(struct snd_sof_dev *sdev) return; } - if (hdr.size < sizeof(hdr)) { - dev_err(sdev->dev, "The received message size is invalid\n"); + if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) { + dev_err(sdev->dev, "The received message size is invalid: %u\n", + hdr.size); return; } -- cgit From 9e269e3aa9006440de639597079ee7140ef5b5f3 Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Tue, 7 Mar 2023 13:07:51 +0200 Subject: ASoC: SOF: ipc4-topology: Fix incorrect sample rate print unit This patch fixes the sample rate print unit from KHz to Hz. E.g. 48000KHz becomes 48000Hz. Signed-off-by: Seppo Ingalsuo Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307110751.2053-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index 3e27c7a48ebd..dc44ba2ec71c 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -155,7 +155,7 @@ static void sof_ipc4_dbg_audio_format(struct device *dev, for (i = 0; i < num_format; i++, ptr = (u8 *)ptr + object_size) { fmt = ptr; dev_dbg(dev, - " #%d: %uKHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x)\n", + " #%d: %uHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x)\n", i, fmt->sampling_frequency, fmt->bit_depth, fmt->ch_map, fmt->ch_cfg, fmt->interleaving_style, fmt->fmt_cfg); } -- cgit From 858a438a6cf919e5727d2a0f5f3f0e68b2d5354e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 7 Mar 2023 12:07:33 +0200 Subject: ASoC: Intel: soc-acpi: fix copy-paste issue in topology names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason the convention for topology names was not followed and the name inspired by another unrelated hardware configuration. As a result, the kernel will request a non-existent topology file. Link: https://github.com/thesofproject/sof/pull/6878 Fixes: 2ec8b081d59f ("ASoC: Intel: soc-acpi: Add entry for sof_es8336 in ADL match table") Cc: stable@vger.kernel.org Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307100733.15025-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-adl-match.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index 56ee5fef66a8..28dd2046e4ac 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -559,7 +559,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { { .comp_ids = &essx_83x6, .drv_name = "sof-essx8336", - .sof_tplg_filename = "sof-adl-es83x6", /* the tplg suffix is added at run time */ + .sof_tplg_filename = "sof-adl-es8336", /* the tplg suffix is added at run time */ .tplg_quirk_mask = SND_SOC_ACPI_TPLG_INTEL_SSP_NUMBER | SND_SOC_ACPI_TPLG_INTEL_SSP_MSB | SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER, -- cgit From 6ba8ddf86a3ada463e9952a19b069f978a70a748 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 7 Mar 2023 13:48:15 +0200 Subject: ASoC: SOF: topology: Fix error handling in sof_widget_ready() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the error paths in sof_widget_ready() to free all allocated memory and prevent memory leaks. Signed-off-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307114815.4909-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/topology.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index 4a62ccc71fcb..9f3a038fe21a 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -1388,14 +1388,15 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, if (ret < 0) { dev_err(scomp->dev, "failed to parse component pin tokens for %s\n", w->name); - return ret; + goto widget_free; } if (swidget->num_sink_pins > SOF_WIDGET_MAX_NUM_PINS || swidget->num_source_pins > SOF_WIDGET_MAX_NUM_PINS) { dev_err(scomp->dev, "invalid pins for %s: [sink: %d, src: %d]\n", swidget->widget->name, swidget->num_sink_pins, swidget->num_source_pins); - return -EINVAL; + ret = -EINVAL; + goto widget_free; } if (swidget->num_sink_pins > 1) { @@ -1404,7 +1405,7 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, if (ret < 0) { dev_err(scomp->dev, "failed to parse sink pin binding for %s\n", w->name); - return ret; + goto widget_free; } } @@ -1414,7 +1415,7 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, if (ret < 0) { dev_err(scomp->dev, "failed to parse source pin binding for %s\n", w->name); - return ret; + goto widget_free; } } @@ -1436,9 +1437,8 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, case snd_soc_dapm_dai_out: dai = kzalloc(sizeof(*dai), GFP_KERNEL); if (!dai) { - kfree(swidget); - return -ENOMEM; - + ret = -ENOMEM; + goto widget_free; } ret = sof_widget_parse_tokens(scomp, swidget, tw, token_list, token_list_size); @@ -1496,8 +1496,7 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, tw->shift, swidget->id, tw->name, strnlen(tw->sname, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) > 0 ? tw->sname : "none"); - kfree(swidget); - return ret; + goto widget_free; } if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) { @@ -1518,10 +1517,7 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, if (ret) { dev_err(scomp->dev, "widget event binding failed for %s\n", swidget->widget->name); - kfree(swidget->private); - kfree(swidget->tuples); - kfree(swidget); - return ret; + goto free; } } } @@ -1532,10 +1528,8 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, spipe = kzalloc(sizeof(*spipe), GFP_KERNEL); if (!spipe) { - kfree(swidget->private); - kfree(swidget->tuples); - kfree(swidget); - return -ENOMEM; + ret = -ENOMEM; + goto free; } spipe->pipe_widget = swidget; @@ -1546,6 +1540,12 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, w->dobj.private = swidget; list_add(&swidget->list, &sdev->widget_list); return ret; +free: + kfree(swidget->private); + kfree(swidget->tuples); +widget_free: + kfree(swidget); + return ret; } static int sof_route_unload(struct snd_soc_component *scomp, -- cgit From ca09e2a351fbc7836ba9418304ff0c3e72addfe0 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 7 Mar 2023 11:53:41 +0200 Subject: ASoC: SOF: Intel: pci-tng: revert invalid bar size setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic for the ioremap is to find the resource index 3 (IRAM) and infer the BAR address by subtracting the IRAM offset. The BAR size defined in hardware specifications is 2MB. The commit 5947b2726beb6 ("ASoC: SOF: Intel: Check the bar size before remapping") tried to find the BAR size by querying the resource length instead of a pre-canned value, but by requesting the size for index 3 it only gets the size of the IRAM. That's obviously wrong and prevents the probe from proceeding. This commit attempted to fix an issue in a fuzzing/simulated environment but created another on actual devices, so the best course of action is to revert that change. Reported-by: Ferry Toth Tested-by: Ferry Toth (Intel Edison-Arduino) Link: https://github.com/thesofproject/linux/issues/3901 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307095341.3222-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tng.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c index 5b2b409752c5..8c22a00266c0 100644 --- a/sound/soc/sof/intel/pci-tng.c +++ b/sound/soc/sof/intel/pci-tng.c @@ -75,11 +75,7 @@ static int tangier_pci_probe(struct snd_sof_dev *sdev) /* LPE base */ base = pci_resource_start(pci, desc->resindex_lpe_base) - IRAM_OFFSET; - size = pci_resource_len(pci, desc->resindex_lpe_base); - if (size < PCI_BAR_SIZE) { - dev_err(sdev->dev, "error: I/O region is too small.\n"); - return -ENODEV; - } + size = PCI_BAR_SIZE; dev_dbg(sdev->dev, "LPE PHY base at 0x%x size 0x%x", base, size); sdev->bar[DSP_BAR] = devm_ioremap(sdev->dev, base, size); -- cgit From b66bfc3a9810caed5d55dd8907110bdc8028b06b Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Tue, 7 Mar 2023 13:46:39 +0200 Subject: ASoC: SOF: sof-audio: Fix broken early bclk feature for SSP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the removal of widget setup during BE hw_params, the DAI config IPC is never sent with the SOF_DAI_CONFIG_FLAGS_HW_PARAMS. This means that the early bit clock feature required for certain codecs will be broken. Fix this by saving the config flags sent during BE DAI hw_params and reusing it when the DAI_CONFIG IPC is sent after the DAI widget is set up. Also, free the DAI config before the widget is freed. The DAI_CONFIG IPC sent during the sof_widget_free() does not have the DAI index information. So, save the dai_index in the config during hw_params and reuse it during hw_free. For IPC4, do not clear the node ID during hw_free. It will be needed for freeing the group_ida during unprepare. Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307114639.4553-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc3-topology.c | 32 ++++++++++++++++++++++++++++++-- sound/soc/sof/ipc4-topology.c | 15 +++++++++++++-- sound/soc/sof/sof-audio.c | 28 +++++++++++++++++++++++++--- 3 files changed, 68 insertions(+), 7 deletions(-) diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index dceb78bfe17c..b1f425b39db9 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -2081,7 +2081,9 @@ static int sof_ipc3_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget * break; case SOF_DAI_INTEL_ALH: if (data) { - config->dai_index = data->dai_index; + /* save the dai_index during hw_params and reuse it for hw_free */ + if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS) + config->dai_index = data->dai_index; config->alh.stream_id = data->dai_data; } break; @@ -2089,7 +2091,30 @@ static int sof_ipc3_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget * break; } - config->flags = flags; + /* + * The dai_config op is invoked several times and the flags argument varies as below: + * BE DAI hw_params: When the op is invoked during the BE DAI hw_params, flags contains + * SOF_DAI_CONFIG_FLAGS_HW_PARAMS along with quirks + * FE DAI hw_params: When invoked during FE DAI hw_params after the DAI widget has + * just been set up in the DSP, flags is set to SOF_DAI_CONFIG_FLAGS_HW_PARAMS with no + * quirks + * BE DAI trigger: When invoked during the BE DAI trigger, flags is set to + * SOF_DAI_CONFIG_FLAGS_PAUSE and contains no quirks + * BE DAI hw_free: When invoked during the BE DAI hw_free, flags is set to + * SOF_DAI_CONFIG_FLAGS_HW_FREE and contains no quirks + * FE DAI hw_free: When invoked during the FE DAI hw_free, flags is set to + * SOF_DAI_CONFIG_FLAGS_HW_FREE and contains no quirks + * + * The DAI_CONFIG IPC is sent to the DSP, only after the widget is set up during the FE + * DAI hw_params. But since the BE DAI hw_params precedes the FE DAI hw_params, the quirks + * need to be preserved when assigning the flags before sending the IPC. + * For the case of PAUSE/HW_FREE, since there are no quirks, flags can be used as is. + */ + + if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS) + config->flags |= flags; + else + config->flags = flags; /* only send the IPC if the widget is set up in the DSP */ if (swidget->use_count > 0) { @@ -2097,6 +2122,9 @@ static int sof_ipc3_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget * &reply, sizeof(reply)); if (ret < 0) dev_err(sdev->dev, "Failed to set dai config for %s\n", dai->name); + + /* clear the flags once the IPC has been sent even if it fails */ + config->flags = SOF_DAI_CONFIG_FLAGS_NONE; } return ret; diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index dc44ba2ec71c..ae02cc152f87 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -980,6 +980,7 @@ static void sof_ipc4_unprepare_copier_module(struct snd_sof_widget *swidget) ipc4_copier = dai->private; if (ipc4_copier->dai_type == SOF_DAI_INTEL_ALH) { + struct sof_ipc4_copier_data *copier_data = &ipc4_copier->data; struct sof_ipc4_alh_configuration_blob *blob; unsigned int group_id; @@ -989,6 +990,9 @@ static void sof_ipc4_unprepare_copier_module(struct snd_sof_widget *swidget) ALH_MULTI_GTW_BASE; ida_free(&alh_group_ida, group_id); } + + /* clear the node ID */ + copier_data->gtw_cfg.node_id &= ~SOF_IPC4_NODE_INDEX_MASK; } } @@ -1940,8 +1944,15 @@ static int sof_ipc4_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget * pipeline->skip_during_fe_trigger = true; fallthrough; case SOF_DAI_INTEL_ALH: - copier_data->gtw_cfg.node_id &= ~SOF_IPC4_NODE_INDEX_MASK; - copier_data->gtw_cfg.node_id |= SOF_IPC4_NODE_INDEX(data->dai_data); + /* + * Do not clear the node ID when this op is invoked with + * SOF_DAI_CONFIG_FLAGS_HW_FREE. It is needed to free the group_ida during + * unprepare. + */ + if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS) { + copier_data->gtw_cfg.node_id &= ~SOF_IPC4_NODE_INDEX_MASK; + copier_data->gtw_cfg.node_id |= SOF_IPC4_NODE_INDEX(data->dai_data); + } break; case SOF_DAI_INTEL_DMIC: case SOF_DAI_INTEL_SSP: diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index 760621bfc802..d7df29f2ada8 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -50,9 +50,27 @@ static int sof_widget_free_unlocked(struct snd_sof_dev *sdev, /* reset route setup status for all routes that contain this widget */ sof_reset_route_setup_status(sdev, swidget); + /* free DAI config and continue to free widget even if it fails */ + if (WIDGET_IS_DAI(swidget->id)) { + struct snd_sof_dai_config_data data; + unsigned int flags = SOF_DAI_CONFIG_FLAGS_HW_FREE; + + data.dai_data = DMA_CHAN_INVALID; + + if (tplg_ops && tplg_ops->dai_config) { + err = tplg_ops->dai_config(sdev, swidget, flags, &data); + if (err < 0) + dev_err(sdev->dev, "failed to free config for widget %s\n", + swidget->widget->name); + } + } + /* continue to disable core even if IPC fails */ - if (tplg_ops && tplg_ops->widget_free) - err = tplg_ops->widget_free(sdev, swidget); + if (tplg_ops && tplg_ops->widget_free) { + ret = tplg_ops->widget_free(sdev, swidget); + if (ret < 0 && !err) + err = ret; + } /* * disable widget core. continue to route setup status and complete flag @@ -151,8 +169,12 @@ static int sof_widget_setup_unlocked(struct snd_sof_dev *sdev, /* send config for DAI components */ if (WIDGET_IS_DAI(swidget->id)) { - unsigned int flags = SOF_DAI_CONFIG_FLAGS_NONE; + unsigned int flags = SOF_DAI_CONFIG_FLAGS_HW_PARAMS; + /* + * The config flags saved during BE DAI hw_params will be used for IPC3. IPC4 does + * not use the flags argument. + */ if (tplg_ops && tplg_ops->dai_config) { ret = tplg_ops->dai_config(sdev, swidget, flags, NULL); if (ret < 0) -- cgit From c99e48f4ce9b986ab7992ec7283a06dae875f668 Mon Sep 17 00:00:00 2001 From: Jaska Uimonen Date: Tue, 7 Mar 2023 13:07:30 +0200 Subject: ASoC: SOF: ipc4-topology: set dmic dai index from copier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dmic dai index was set incorrectly to bits 5-7, when it is actually using just the lowest 3. Fix the macro for setting the bits. Fixes: aa84ffb72158 ("ASoC: SOF: ipc4-topology: Add support for SSP/DMIC DAI's") Signed-off-by: Jaska Uimonen Reviewed-by: Adrian Bonislawski Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307110730.1995-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h index 72529179ac22..c0e457f7f51a 100644 --- a/sound/soc/sof/ipc4-topology.h +++ b/sound/soc/sof/ipc4-topology.h @@ -46,7 +46,7 @@ #define SOF_IPC4_NODE_INDEX_INTEL_SSP(x) (((x) & 0xf) << 4) /* Node ID for DMIC type DAI copiers */ -#define SOF_IPC4_NODE_INDEX_INTEL_DMIC(x) (((x) & 0x7) << 5) +#define SOF_IPC4_NODE_INDEX_INTEL_DMIC(x) ((x) & 0x7) #define SOF_IPC4_GAIN_ALL_CHANNELS_MASK 0xffffffff #define SOF_IPC4_VOL_ZERO_DB 0x7fffffff -- cgit From 52a55779ed14792a150421339664193d6eb8e036 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Tue, 7 Mar 2023 11:54:52 +0200 Subject: ASoC: SOF: Intel: hda-dsp: harden D0i3 programming sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add delay between set and wait command according to hardware programming sequence. Also add debug log to detect error. Signed-off-by: Rander Wang Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307095453.3719-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-dsp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 68eb06f13a1f..a6f2822401e0 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -392,6 +392,12 @@ static int hda_dsp_update_d0i3c_register(struct snd_sof_dev *sdev, u8 value) snd_sof_dsp_update8(sdev, HDA_DSP_HDA_BAR, chip->d0i3_offset, SOF_HDA_VS_D0I3C_I3, value); + /* + * The value written to the D0I3C::I3 bit may not be taken into account immediately. + * A delay is recommended before checking if D0I3C::CIP is cleared + */ + usleep_range(30, 40); + /* Wait for cmd in progress to be cleared before exiting the function */ ret = hda_dsp_wait_d0i3c_done(sdev); if (ret < 0) { @@ -400,6 +406,12 @@ static int hda_dsp_update_d0i3c_register(struct snd_sof_dev *sdev, u8 value) } reg = snd_sof_dsp_read8(sdev, HDA_DSP_HDA_BAR, chip->d0i3_offset); + /* Confirm d0i3 state changed with paranoia check */ + if ((reg ^ value) & SOF_HDA_VS_D0I3C_I3) { + dev_err(sdev->dev, "failed to update D0I3C!\n"); + return -EIO; + } + trace_sof_intel_D0I3C_updated(sdev, reg); return 0; -- cgit From 8bac40b8ed17ab1be9133e9620f65fae80262b7e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 7 Mar 2023 11:54:12 +0200 Subject: ASoC: SOF: Intel: hda-ctrl: re-add sleep after entering and exiting reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a09d82ce0a867 ("ASoC: SOF: Intel: hda-ctrl: remove useless sleep") It was a mistake to remove those delays, in light of comments in the HDaudio spec captured in snd_hdac_bus_reset_link() that the codec needs time for its initialization and PLL lock. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Reviewed-by: Rander Wang Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307095412.3416-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-ctrl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/sof/intel/hda-ctrl.c b/sound/soc/sof/intel/hda-ctrl.c index 3aea36c077c9..f3bdeba28412 100644 --- a/sound/soc/sof/intel/hda-ctrl.c +++ b/sound/soc/sof/intel/hda-ctrl.c @@ -196,12 +196,15 @@ int hda_dsp_ctrl_init_chip(struct snd_sof_dev *sdev) goto err; } + usleep_range(500, 1000); + /* exit HDA controller reset */ ret = hda_dsp_ctrl_link_reset(sdev, false); if (ret < 0) { dev_err(sdev->dev, "error: failed to exit HDA controller reset\n"); goto err; } + usleep_range(1000, 1200); hda_codec_detect_mask(sdev); -- cgit From c7e328f1cbf22efe23bc3cd7dd6bb14efccc28d0 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 7 Mar 2023 13:46:59 +0200 Subject: ASoC: SOF: sof-audio: don't squelch errors in WIDGET_SETUP phase When an IPC error happens while setting-up a widget during the FE hw_params phase, the existing logic will unwind all previous configurations but will overwrite the return status. The ALSA/ASoC logic will then proceed with the prepare and trigger phases, even though the firmware resources are not available. Fix by returning the initial error code and ignoring the code returned in the UNPREPARE phase. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Chao Song Reviewed-by: Bard Liao Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307114659.4614-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-audio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index d7df29f2ada8..6de388a8d0b8 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -610,8 +610,8 @@ int sof_widget_list_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm, ret = sof_walk_widgets_in_order(sdev, spcm, fe_params, platform_params, dir, SOF_WIDGET_SETUP); if (ret < 0) { - ret = sof_walk_widgets_in_order(sdev, spcm, fe_params, platform_params, - dir, SOF_WIDGET_UNPREPARE); + sof_walk_widgets_in_order(sdev, spcm, fe_params, platform_params, + dir, SOF_WIDGET_UNPREPARE); return ret; } -- cgit From e45cd86c3a78bfb9875a5eb8ab5dab459b59bbe2 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Tue, 7 Mar 2023 13:06:56 +0200 Subject: ASoC: SOF: IPC4: update gain ipc msg definition to align with fw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent firmware changes modified the curve duration from 32 to 64 bits, which breaks volume ramps. A simple solution would be to change the definition, but unfortunately the ASoC topology framework only supports up to 32 bit tokens. This patch suggests breaking the 64 bit value in low and high parts, with only the low-part extracted from topology and high-part only zeroes. Since the curve duration is represented in hundred of nanoseconds, we can still represent a 400s ramp, which is just fine. The defacto ABI change has no effect on existing users since the IPC4 firmware has not been released just yet. Link: https://github.com/thesofproject/linux/issues/4026 Signed-off-by: Rander Wang Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230307110656.1816-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-control.c | 3 ++- sound/soc/sof/ipc4-topology.c | 4 ++-- sound/soc/sof/ipc4-topology.h | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sound/soc/sof/ipc4-control.c b/sound/soc/sof/ipc4-control.c index 67bd2233fd9a..9a71af1a613a 100644 --- a/sound/soc/sof/ipc4-control.c +++ b/sound/soc/sof/ipc4-control.c @@ -97,7 +97,8 @@ sof_ipc4_set_volume_data(struct snd_sof_dev *sdev, struct snd_sof_widget *swidge } /* set curve type and duration from topology */ - data.curve_duration = gain->data.curve_duration; + data.curve_duration_l = gain->data.curve_duration_l; + data.curve_duration_h = gain->data.curve_duration_h; data.curve_type = gain->data.curve_type; msg->data_ptr = &data; diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index ae02cc152f87..a623707c8ffc 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -107,7 +107,7 @@ static const struct sof_topology_token gain_tokens[] = { get_token_u32, offsetof(struct sof_ipc4_gain_data, curve_type)}, {SOF_TKN_GAIN_RAMP_DURATION, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32, - offsetof(struct sof_ipc4_gain_data, curve_duration)}, + offsetof(struct sof_ipc4_gain_data, curve_duration_l)}, {SOF_TKN_GAIN_VAL, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32, offsetof(struct sof_ipc4_gain_data, init_val)}, }; @@ -692,7 +692,7 @@ static int sof_ipc4_widget_setup_comp_pga(struct snd_sof_widget *swidget) dev_dbg(scomp->dev, "pga widget %s: ramp type: %d, ramp duration %d, initial gain value: %#x, cpc %d\n", - swidget->widget->name, gain->data.curve_type, gain->data.curve_duration, + swidget->widget->name, gain->data.curve_type, gain->data.curve_duration_l, gain->data.init_val, gain->base_config.cpc); ret = sof_ipc4_widget_setup_msg(swidget, &gain->msg); diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h index c0e457f7f51a..123f1096f326 100644 --- a/sound/soc/sof/ipc4-topology.h +++ b/sound/soc/sof/ipc4-topology.h @@ -277,14 +277,16 @@ struct sof_ipc4_control_data { * @init_val: Initial value * @curve_type: Curve type * @reserved: reserved for future use - * @curve_duration: Curve duration + * @curve_duration_l: Curve duration low part + * @curve_duration_h: Curve duration high part */ struct sof_ipc4_gain_data { uint32_t channels; uint32_t init_val; uint32_t curve_type; uint32_t reserved; - uint32_t curve_duration; + uint32_t curve_duration_l; + uint32_t curve_duration_h; } __aligned(8); /** -- cgit From c22f2ff8724b49dce2ae797e9fbf4bc0fa91112f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 11:32:42 +0100 Subject: drm/sun4i: fix missing component unbind on bind errors Make sure to unbind all subcomponents when binding the aggregate device fails. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Cc: stable@vger.kernel.org # 4.7 Cc: Maxime Ripard Signed-off-by: Johan Hovold Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230306103242.4775-1-johan+linaro@kernel.org --- drivers/gpu/drm/sun4i/sun4i_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index cc94efbbf2d4..d6c741716167 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -95,12 +95,12 @@ static int sun4i_drv_bind(struct device *dev) /* drm_vblank_init calls kcalloc, which can fail */ ret = drm_vblank_init(drm, drm->mode_config.num_crtc); if (ret) - goto cleanup_mode_config; + goto unbind_all; /* Remove early framebuffers (ie. simplefb) */ ret = drm_aperture_remove_framebuffers(false, &sun4i_drv_driver); if (ret) - goto cleanup_mode_config; + goto unbind_all; sun4i_framebuffer_init(drm); @@ -119,6 +119,8 @@ static int sun4i_drv_bind(struct device *dev) finish_poll: drm_kms_helper_poll_fini(drm); +unbind_all: + component_unbind_all(dev, NULL); cleanup_mode_config: drm_mode_config_cleanup(drm); of_reserved_mem_device_release(dev); -- cgit From 5d89176af1ae201c01c10a89b68b27cfc683b76c Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Mon, 27 Feb 2023 18:59:41 +0800 Subject: sched/doc: supplement CPU capacity with RISC-V This commit 7d2078310cbf ("dt-bindings: arm: move cpu-capacity to a shared loation") updates some references about capacity-dmips-mhz property in this document. The list of architectures using capacity-dmips-mhz omits RISC-V, so supplements it here. Signed-off-by: Song Shuai Reviewed-by: Palmer Dabbelt # English Acked-by: Palmer Dabbelt Reviewed-by: Alex Shi Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230227105941.2749193-1-suagrfillet@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/scheduler/sched-capacity.rst | 2 +- Documentation/translations/zh_CN/scheduler/sched-capacity.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/scheduler/sched-capacity.rst b/Documentation/scheduler/sched-capacity.rst index 8e2b8538bc2b..e2c1cf743158 100644 --- a/Documentation/scheduler/sched-capacity.rst +++ b/Documentation/scheduler/sched-capacity.rst @@ -258,7 +258,7 @@ Linux cannot currently figure out CPU capacity on its own, this information thus needs to be handed to it. Architectures must define arch_scale_cpu_capacity() for that purpose. -The arm and arm64 architectures directly map this to the arch_topology driver +The arm, arm64, and RISC-V architectures directly map this to the arch_topology driver CPU scaling data, which is derived from the capacity-dmips-mhz CPU binding; see Documentation/devicetree/bindings/cpu/cpu-capacity.txt. diff --git a/Documentation/translations/zh_CN/scheduler/sched-capacity.rst b/Documentation/translations/zh_CN/scheduler/sched-capacity.rst index e07ffdd391d3..8cba135dcd1a 100644 --- a/Documentation/translations/zh_CN/scheduler/sched-capacity.rst +++ b/Documentation/translations/zh_CN/scheduler/sched-capacity.rst @@ -231,7 +231,7 @@ CFS调度类基于实体负载跟踪机制(Per-Entity Load Tracking, PELT) 当前,Linux无法凭自身算出CPU算力,因此必须要有把这个信息传递给Linux的方式。每个架构必须为此 定义arch_scale_cpu_capacity()函数。 -arm和arm64架构直接把这个信息映射到arch_topology驱动的CPU scaling数据中(译注:参考 +arm、arm64和RISC-V架构直接把这个信息映射到arch_topology驱动的CPU scaling数据中(译注:参考 arch_topology.h的percpu变量cpu_scale),它是从capacity-dmips-mhz CPU binding中衍生计算 出来的。参见Documentation/devicetree/bindings/cpu/cpu-capacity.txt。 -- cgit From 74596085796fae0cfce3e42ee46bf4f8acbdac55 Mon Sep 17 00:00:00 2001 From: Glenn Washburn Date: Mon, 27 Feb 2023 12:40:42 -0600 Subject: docs: Correct missing "d_" prefix for dentry_operations member d_weak_revalidate The details for struct dentry_operations member d_weak_revalidate is missing a "d_" prefix. Fixes: af96c1e304f7 ("docs: filesystems: vfs: Convert vfs.txt to RST") Signed-off-by: Glenn Washburn Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20230227184042.2375235-1-development@efficientek.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/vfs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index c53f30251a66..f3b344f0c0a4 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -1222,7 +1222,7 @@ defined: return -ECHILD and it will be called again in ref-walk mode. -``_weak_revalidate`` +``d_weak_revalidate`` called when the VFS needs to revalidate a "jumped" dentry. This is called when a path-walk ends at dentry that was not acquired by doing a lookup in the parent directory. This includes "/", -- cgit From 38484a1d0c50596c8080a00c269466f60fa4a051 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 6 Mar 2023 20:17:11 +0100 Subject: docs: programming-language: remove mention of the Intel compiler The Intel compiler support has been removed in commit 95207db8166a ("Remove Intel compiler support"). Thus remove its mention in the Documentation too. Signed-off-by: Miguel Ojeda Reviewed-by: Vincenzo Palazzo Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20230306191712.230658-1-ojeda@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/process/programming-language.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Documentation/process/programming-language.rst b/Documentation/process/programming-language.rst index 5fc9160ca1fa..10dc772671d8 100644 --- a/Documentation/process/programming-language.rst +++ b/Documentation/process/programming-language.rst @@ -12,10 +12,6 @@ under ``-std=gnu11`` [gcc-c-dialect-options]_: the GNU dialect of ISO C11. This dialect contains many extensions to the language [gnu-extensions]_, and many of them are used within the kernel as a matter of course. -There is some support for compiling the kernel with ``icc`` [icc]_ for several -of the architectures, although at the time of writing it is not completed, -requiring third-party patches. - Attributes ---------- @@ -38,7 +34,6 @@ Please refer to ``include/linux/compiler_attributes.h`` for more information. .. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards .. [gcc] https://gcc.gnu.org .. [clang] https://clang.llvm.org -.. [icc] https://software.intel.com/en-us/c-compilers .. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html .. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html .. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html -- cgit From 0b02076f995332fe1e457da29dd61c3b66c862f7 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 6 Mar 2023 20:17:12 +0100 Subject: docs: programming-language: add Rust programming language section Following the C text in the file, add a mention about the Rust programming language, the currently supported compiler and the edition used (similar to the "dialect" mention for C). Similarly, add a mention about the unstable features used (similar to the "extensions" mentions for C). In addition, add some links to complement the information. Signed-off-by: Miguel Ojeda Link: https://lore.kernel.org/r/20230306191712.230658-2-ojeda@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/process/programming-language.rst | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/Documentation/process/programming-language.rst b/Documentation/process/programming-language.rst index 10dc772671d8..bc56dee6d0bc 100644 --- a/Documentation/process/programming-language.rst +++ b/Documentation/process/programming-language.rst @@ -31,6 +31,20 @@ in order to feature detect which ones can be used and/or to shorten the code. Please refer to ``include/linux/compiler_attributes.h`` for more information. +Rust +---- + +The kernel has experimental support for the Rust programming language +[rust-language]_ under ``CONFIG_RUST``. It is compiled with ``rustc`` [rustc]_ +under ``--edition=2021`` [rust-editions]_. Editions are a way to introduce +small changes to the language that are not backwards compatible. + +On top of that, some unstable features [rust-unstable-features]_ are used in +the kernel. Unstable features may change in the future, thus it is an important +goal to reach a point where only stable features are used. + +Please refer to Documentation/rust/index.rst for more information. + .. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards .. [gcc] https://gcc.gnu.org .. [clang] https://clang.llvm.org @@ -38,4 +52,7 @@ Please refer to ``include/linux/compiler_attributes.h`` for more information. .. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html .. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html .. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf - +.. [rust-language] https://www.rust-lang.org +.. [rustc] https://doc.rust-lang.org/rustc/ +.. [rust-editions] https://doc.rust-lang.org/edition-guide/editions/ +.. [rust-unstable-features] https://github.com/Rust-for-Linux/linux/issues/2 -- cgit From a414684e3b735a4114c19295a07e8cb2eb889dae Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 28 Feb 2023 14:46:57 +0100 Subject: docs: rebasing-and-merging: Drop wrong statement about git MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "^0" syntax is no longer needed to fast-forward to a mainline commit; take that out and add --ff-only to force an error if fast-forward is not possible. Signed-off-by: Uwe Kleine-König [jc: rewrote changelog] Link: https://lore.kernel.org/r/20230228134657.1797871-1-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Corbet --- Documentation/maintainer/rebasing-and-merging.rst | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Documentation/maintainer/rebasing-and-merging.rst b/Documentation/maintainer/rebasing-and-merging.rst index 09f988e7fa71..85800ce95ae5 100644 --- a/Documentation/maintainer/rebasing-and-merging.rst +++ b/Documentation/maintainer/rebasing-and-merging.rst @@ -213,11 +213,7 @@ point rather than some random spot. If your upstream-bound branch has emptied entirely into the mainline during the merge window, you can pull it forward with a command like:: - git merge v5.2-rc1^0 - -The "^0" will cause Git to do a fast-forward merge (which should be -possible in this situation), thus avoiding the addition of a spurious merge -commit. + git merge --ff-only v5.2-rc1 The guidelines laid out above are just that: guidelines. There will always be situations that call out for a different solution, and these guidelines -- cgit From 9a9a8fe26751334b7739193a94eba741073b8a55 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 7 Mar 2023 15:46:15 +0100 Subject: drm/ttm: Fix a NULL pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LRU mechanism may look up a resource in the process of being removed from an object. The locking rules here are a bit unclear but it looks currently like res->bo assignment is protected by the LRU lock, whereas bo->resource is protected by the object lock, while *clearing* of bo->resource is also protected by the LRU lock. This means that if we check that bo->resource points to the LRU resource under the LRU lock we should be safe. So perform that check before deciding to swap out a bo. That avoids dereferencing a NULL bo->resource in ttm_bo_swapout(). Fixes: 6a9b02899402 ("drm/ttm: move the LRU into resource handling v4") Cc: Christian König Cc: Daniel Vetter Cc: Christian Koenig Cc: Huang Rui Cc: Alex Deucher Cc: Felix Kuehling Cc: Philip Yang Cc: Qiang Yu Cc: Matthew Auld Cc: Nirmoy Das Cc: Tvrtko Ursulin Cc: "Thomas Hellström" Cc: Anshuman Gupta Cc: Arunpravin Paneer Selvam Cc: dri-devel@lists.freedesktop.org Cc: # v5.19+ Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230307144621.10748-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index e7147e304637..b84f74807ca1 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -158,7 +158,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo = res->bo; uint32_t num_pages; - if (!bo) + if (!bo || bo->resource != res) continue; num_pages = PFN_UP(bo->base.size); -- cgit From 3b80a03d455143cc9135dac86722bbdd079daff3 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Fri, 24 Feb 2023 12:03:05 +0200 Subject: docs/mm: Physical Memory: fix a reference to a file that doesn't exist kbuild reports: >> Warning: Documentation/mm/physical_memory.rst references a file that doesn't exist: Documentation/admin-guide/mm/memory_hotplug.rst Fix the filename to be 'Documentation/admin-guide/mm/memory-hotplug.rst'. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202302231311.567PAoS2-lkp@intel.com/ Fixes: 353c7dd636ed ("docs/mm: Physical Memory: remove useless markup") Signed-off-by: Mike Rapoport (IBM) Link: https://lore.kernel.org/r/20230224100306.2287696-1-rppt@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/mm/physical_memory.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/mm/physical_memory.rst b/Documentation/mm/physical_memory.rst index f9d7ea4b9dca..1bc888d36ea1 100644 --- a/Documentation/mm/physical_memory.rst +++ b/Documentation/mm/physical_memory.rst @@ -66,7 +66,7 @@ one of the types described below. also populated on boot using one of ``kernelcore``, ``movablecore`` and ``movable_node`` kernel command line parameters. See Documentation/mm/page_migration.rst and - Documentation/admin-guide/mm/memory_hotplug.rst for additional details. + Documentation/admin-guide/mm/memory-hotplug.rst for additional details. * ``ZONE_DEVICE`` represents memory residing on devices such as PMEM and GPU. It has different characteristics than RAM zone types and it exists to provide -- cgit From 87eae260995577d203a70d72d46976521a5687e1 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Fri, 24 Feb 2023 12:03:06 +0200 Subject: docs/mm: hugetlbfs_reserv: fix a reference to a file that doesn't exist kbuild reports: >> Warning: Documentation/mm/hugetlbfs_reserv.rst references a file that doesn't exist: Documentation/mm/hugetlbpage.rst >> Warning: Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst references a file that doesn't exist: Documentation/mm/hugetlbpage.rst Fix the filename to be 'Documentation/admin-guide/mm/hugetlbpage.rst'. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202302231854.sKlCmx9K-lkp@intel.com/ Fixes: ee86588960e2 ("docs/mm: remove useless markup") Signed-off-by: Mike Rapoport (IBM) Link: https://lore.kernel.org/r/20230224100306.2287696-2-rppt@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/mm/hugetlbfs_reserv.rst | 8 ++++---- Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/mm/hugetlbfs_reserv.rst b/Documentation/mm/hugetlbfs_reserv.rst index 3d05d64de9b4..d9c2b0f01dcd 100644 --- a/Documentation/mm/hugetlbfs_reserv.rst +++ b/Documentation/mm/hugetlbfs_reserv.rst @@ -5,10 +5,10 @@ Hugetlbfs Reservation Overview ======== -Huge pages as described at Documentation/mm/hugetlbpage.rst are typically -preallocated for application use. These huge pages are instantiated in a -task's address space at page fault time if the VMA indicates huge pages are -to be used. If no huge page exists at page fault time, the task is sent +Huge pages as described at Documentation/admin-guide/mm/hugetlbpage.rst are +typically preallocated for application use. These huge pages are instantiated +in a task's address space at page fault time if the VMA indicates huge pages +are to be used. If no huge page exists at page fault time, the task is sent a SIGBUS and often dies an unhappy death. Shortly after huge page support was added, it was determined that it would be better to detect a shortage of huge pages at mmap() time. The idea is that if there were not enough diff --git a/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst b/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst index c1fa35315d8b..b7a0544224ad 100644 --- a/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst +++ b/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst @@ -15,7 +15,8 @@ Hugetlbfs 预留 概述 ==== -Documentation/mm/hugetlbpage.rst 中描述的巨页通常是预先分配给应用程序使用的。如果VMA指 +Documentation/admin-guide/mm/hugetlbpage.rst +中描述的巨页通常是预先分配给应用程序使用的 。如果VMA指 示要使用巨页,这些巨页会在缺页异常时被实例化到任务的地址空间。如果在缺页异常 时没有巨页存在,任务就会被发送一个SIGBUS,并经常不高兴地死去。在加入巨页支 持后不久,人们决定,在mmap()时检测巨页的短缺情况会更好。这个想法是,如果 -- cgit From a5904f415e1af72fa8fe6665aa4f554dc2099a95 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:29 +0100 Subject: interconnect: fix mem leak when freeing nodes The node link array is allocated when adding links to a node but is not deallocated when nodes are destroyed. Fixes: 11f1ceca7031 ("interconnect: Add generic on-chip interconnect API") Cc: stable@vger.kernel.org # 5.1 Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Tested-by: Luca Ceresoli # i.MX8MP MSC SM2-MB-EP1 Board Link: https://lore.kernel.org/r/20230306075651.2449-2-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 0f392f59b135..5217f449eeec 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -850,6 +850,10 @@ void icc_node_destroy(int id) mutex_unlock(&icc_lock); + if (!node) + return; + + kfree(node->links); kfree(node); } EXPORT_SYMBOL_GPL(icc_node_destroy); -- cgit From e0e7089bf9a87bc5e3997422e4e24563424f9018 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:30 +0100 Subject: interconnect: fix icc_provider_del() error handling The interconnect framework currently expects that providers are only removed when there are no users and after all nodes have been removed. There is currently nothing that guarantees this to be the case and the framework does not do any reference counting, but refusing to remove the provider is never correct as that would leave a dangling pointer to a resource that is about to be released in the global provider list (e.g. accessible through debugfs). Replace the current sanity checks with WARN_ON() so that the provider is always removed. Fixes: 11f1ceca7031 ("interconnect: Add generic on-chip interconnect API") Cc: stable@vger.kernel.org # 5.1: 680f8666baf6: interconnect: Make icc_provider_del() return void Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Tested-by: Luca Ceresoli # i.MX8MP MSC SM2-MB-EP1 Board Link: https://lore.kernel.org/r/20230306075651.2449-3-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 5217f449eeec..cabb6f5df83e 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1065,18 +1065,8 @@ EXPORT_SYMBOL_GPL(icc_provider_add); void icc_provider_del(struct icc_provider *provider) { mutex_lock(&icc_lock); - if (provider->users) { - pr_warn("interconnect provider still has %d users\n", - provider->users); - mutex_unlock(&icc_lock); - return; - } - - if (!list_empty(&provider->nodes)) { - pr_warn("interconnect provider still has nodes\n"); - mutex_unlock(&icc_lock); - return; - } + WARN_ON(provider->users); + WARN_ON(!list_empty(&provider->nodes)); list_del(&provider->provider_list); mutex_unlock(&icc_lock); -- cgit From eb59eca0d8ac15f8c1b7f1cd35999455a90292c0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:31 +0100 Subject: interconnect: fix provider registration API The current interconnect provider interface is inherently racy as providers are expected to be added before being fully initialised. Specifically, nodes are currently not added and the provider data is not initialised until after registering the provider which can cause racing DT lookups to fail. Add a new provider API which will be used to fix up the interconnect drivers. The old API is reimplemented using the new interface and will be removed once all drivers have been fixed. Fixes: 11f1ceca7031 ("interconnect: Add generic on-chip interconnect API") Fixes: 87e3031b6fbd ("interconnect: Allow endpoints translation via DT") Cc: stable@vger.kernel.org # 5.1 Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Tested-by: Luca Ceresoli # i.MX8MP MSC SM2-MB-EP1 Board Link: https://lore.kernel.org/r/20230306075651.2449-4-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 52 +++++++++++++++++++++++++---------- include/linux/interconnect-provider.h | 12 ++++++++ 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index cabb6f5df83e..7a24c1444ace 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -1033,44 +1033,68 @@ int icc_nodes_remove(struct icc_provider *provider) EXPORT_SYMBOL_GPL(icc_nodes_remove); /** - * icc_provider_add() - add a new interconnect provider - * @provider: the interconnect provider that will be added into topology + * icc_provider_init() - initialize a new interconnect provider + * @provider: the interconnect provider to initialize + * + * Must be called before adding nodes to the provider. + */ +void icc_provider_init(struct icc_provider *provider) +{ + WARN_ON(!provider->set); + + INIT_LIST_HEAD(&provider->nodes); +} +EXPORT_SYMBOL_GPL(icc_provider_init); + +/** + * icc_provider_register() - register a new interconnect provider + * @provider: the interconnect provider to register * * Return: 0 on success, or an error code otherwise */ -int icc_provider_add(struct icc_provider *provider) +int icc_provider_register(struct icc_provider *provider) { - if (WARN_ON(!provider->set)) - return -EINVAL; if (WARN_ON(!provider->xlate && !provider->xlate_extended)) return -EINVAL; mutex_lock(&icc_lock); - - INIT_LIST_HEAD(&provider->nodes); list_add_tail(&provider->provider_list, &icc_providers); - mutex_unlock(&icc_lock); - dev_dbg(provider->dev, "interconnect provider added to topology\n"); + dev_dbg(provider->dev, "interconnect provider registered\n"); return 0; } -EXPORT_SYMBOL_GPL(icc_provider_add); +EXPORT_SYMBOL_GPL(icc_provider_register); /** - * icc_provider_del() - delete previously added interconnect provider - * @provider: the interconnect provider that will be removed from topology + * icc_provider_deregister() - deregister an interconnect provider + * @provider: the interconnect provider to deregister */ -void icc_provider_del(struct icc_provider *provider) +void icc_provider_deregister(struct icc_provider *provider) { mutex_lock(&icc_lock); WARN_ON(provider->users); - WARN_ON(!list_empty(&provider->nodes)); list_del(&provider->provider_list); mutex_unlock(&icc_lock); } +EXPORT_SYMBOL_GPL(icc_provider_deregister); + +int icc_provider_add(struct icc_provider *provider) +{ + icc_provider_init(provider); + + return icc_provider_register(provider); +} +EXPORT_SYMBOL_GPL(icc_provider_add); + +void icc_provider_del(struct icc_provider *provider) +{ + WARN_ON(!list_empty(&provider->nodes)); + + icc_provider_deregister(provider); +} EXPORT_SYMBOL_GPL(icc_provider_del); static const struct of_device_id __maybe_unused ignore_list[] = { diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index cd5c5a27557f..d12cd18aab3f 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -122,6 +122,9 @@ int icc_link_destroy(struct icc_node *src, struct icc_node *dst); void icc_node_add(struct icc_node *node, struct icc_provider *provider); void icc_node_del(struct icc_node *node); int icc_nodes_remove(struct icc_provider *provider); +void icc_provider_init(struct icc_provider *provider); +int icc_provider_register(struct icc_provider *provider); +void icc_provider_deregister(struct icc_provider *provider); int icc_provider_add(struct icc_provider *provider); void icc_provider_del(struct icc_provider *provider); struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec); @@ -167,6 +170,15 @@ static inline int icc_nodes_remove(struct icc_provider *provider) return -ENOTSUPP; } +static inline void icc_provider_init(struct icc_provider *provider) { } + +static inline int icc_provider_register(struct icc_provider *provider) +{ + return -ENOTSUPP; +} + +static inline void icc_provider_deregister(struct icc_provider *provider) { } + static inline int icc_provider_add(struct icc_provider *provider) { return -ENOTSUPP; -- cgit From 9fbd35520f1f7f3cbe1873939a27ad9b009f21f9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:32 +0100 Subject: interconnect: imx: fix registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to fail. Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: f0d8048525d7 ("interconnect: Add imx core driver") Cc: stable@vger.kernel.org # 5.8 Cc: Alexandre Bailon Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Tested-by: Luca Ceresoli # i.MX8MP MSC SM2-MB-EP1 Board Link: https://lore.kernel.org/r/20230306075651.2449-5-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c index 823d9be9771a..979ed610f704 100644 --- a/drivers/interconnect/imx/imx.c +++ b/drivers/interconnect/imx/imx.c @@ -295,6 +295,9 @@ int imx_icc_register(struct platform_device *pdev, provider->xlate = of_icc_xlate_onecell; provider->data = data; provider->dev = dev->parent; + + icc_provider_init(provider); + platform_set_drvdata(pdev, imx_provider); if (settings) { @@ -306,20 +309,18 @@ int imx_icc_register(struct platform_device *pdev, } } - ret = icc_provider_add(provider); - if (ret) { - dev_err(dev, "error adding interconnect provider: %d\n", ret); + ret = imx_icc_register_nodes(imx_provider, nodes, nodes_count, settings); + if (ret) return ret; - } - ret = imx_icc_register_nodes(imx_provider, nodes, nodes_count, settings); + ret = icc_provider_register(provider); if (ret) - goto provider_del; + goto err_unregister_nodes; return 0; -provider_del: - icc_provider_del(provider); +err_unregister_nodes: + imx_icc_unregister_nodes(&imx_provider->provider); return ret; } EXPORT_SYMBOL_GPL(imx_icc_register); @@ -328,9 +329,8 @@ void imx_icc_unregister(struct platform_device *pdev) { struct imx_icc_provider *imx_provider = platform_get_drvdata(pdev); + icc_provider_deregister(&imx_provider->provider); imx_icc_unregister_nodes(&imx_provider->provider); - - icc_provider_del(&imx_provider->provider); } EXPORT_SYMBOL_GPL(imx_icc_unregister); -- cgit From 174941ed28a3573db075da46d95b4dcf9d4c49c2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:33 +0100 Subject: interconnect: qcom: osm-l3: fix registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to fail: of_icc_xlate_onecell: invalid index 0 cpu cpu0: error -EINVAL: error finding src node cpu cpu0: dev_pm_opp_of_find_icc_paths: Unable to get path0: -22 qcom-cpufreq-hw: probe of 18591000.cpufreq failed with error -22 Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: 5bc9900addaf ("interconnect: qcom: Add OSM L3 interconnect provider support") Cc: stable@vger.kernel.org # 5.7 Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-6-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/osm-l3.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c index 1bc01ff6e02a..1bafb54f1432 100644 --- a/drivers/interconnect/qcom/osm-l3.c +++ b/drivers/interconnect/qcom/osm-l3.c @@ -158,8 +158,8 @@ static int qcom_osm_l3_remove(struct platform_device *pdev) { struct qcom_osm_l3_icc_provider *qp = platform_get_drvdata(pdev); + icc_provider_deregister(&qp->provider); icc_nodes_remove(&qp->provider); - icc_provider_del(&qp->provider); return 0; } @@ -245,14 +245,9 @@ static int qcom_osm_l3_probe(struct platform_device *pdev) provider->set = qcom_osm_l3_set; provider->aggregate = icc_std_aggregate; provider->xlate = of_icc_xlate_onecell; - INIT_LIST_HEAD(&provider->nodes); provider->data = data; - ret = icc_provider_add(provider); - if (ret) { - dev_err(&pdev->dev, "error adding interconnect provider\n"); - return ret; - } + icc_provider_init(provider); for (i = 0; i < num_nodes; i++) { size_t j; @@ -275,12 +270,15 @@ static int qcom_osm_l3_probe(struct platform_device *pdev) } data->num_nodes = num_nodes; + ret = icc_provider_register(provider); + if (ret) + goto err; + platform_set_drvdata(pdev, qp); return 0; err: icc_nodes_remove(provider); - icc_provider_del(provider); return ret; } -- cgit From bc463201f60803fa6bf2741d59441031cd0910e4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:34 +0100 Subject: interconnect: qcom: rpm: fix probe child-node error handling Make sure to clean up and release resources properly also in case probe fails when populating child devices. Fixes: e39bf2972c6e ("interconnect: icc-rpm: Support child NoC device probe") Cc: stable@vger.kernel.org # 5.17 Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-7-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c index df3196f72536..91778cfcbc65 100644 --- a/drivers/interconnect/qcom/icc-rpm.c +++ b/drivers/interconnect/qcom/icc-rpm.c @@ -541,8 +541,11 @@ regmap_done: platform_set_drvdata(pdev, qp); /* Populate child NoC devices if any */ - if (of_get_child_count(dev->of_node) > 0) - return of_platform_populate(dev->of_node, NULL, NULL, dev); + if (of_get_child_count(dev->of_node) > 0) { + ret = of_platform_populate(dev->of_node, NULL, NULL, dev); + if (ret) + goto err; + } return 0; err: -- cgit From 63cf584203f3367c8b073d417c8e5cbbfc450506 Mon Sep 17 00:00:00 2001 From: James Houghton Date: Thu, 2 Mar 2023 22:24:04 +0000 Subject: mm: teach mincore_hugetlb about pte markers By checking huge_pte_none(), we incorrectly classify PTE markers as "present". Instead, check huge_pte_none_mostly(), classifying PTE markers the same as if the PTE were completely blank. PTE markers, unlike other kinds of swap entries, don't reference any physical page and don't indicate that a physical page was mapped previously. As such, treat them as non-present for the sake of mincore(). Link: https://lkml.kernel.org/r/20230302222404.175303-1-jthoughton@google.com Fixes: 5c041f5d1f23 ("mm: teach core mm about pte markers") Signed-off-by: James Houghton Acked-by: Peter Xu Acked-by: David Hildenbrand Cc: Axel Rasmussen Cc: James Houghton Cc: Signed-off-by: Andrew Morton --- mm/mincore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mincore.c b/mm/mincore.c index cd69b9db0081..d359650b0f75 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -33,7 +33,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, * Hugepages under user process are always in RAM and never * swapped out, but theoretically it needs to be checked. */ - present = pte && !huge_pte_none(huge_ptep_get(pte)); + present = pte && !huge_pte_none_mostly(huge_ptep_get(pte)); for (; addr != end; vec++, addr += PAGE_SIZE) *vec = present; walk->private = vec; -- cgit From 42b2af2c9b7eede8ef21d0943f84d135e21a32a3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 2 Mar 2023 18:54:23 +0100 Subject: mm/userfaultfd: propagate uffd-wp bit when PTE-mapping the huge zeropage Currently, we'd lose the userfaultfd-wp marker when PTE-mapping a huge zeropage, resulting in the next write faults in the PMD range not triggering uffd-wp events. Various actions (partial MADV_DONTNEED, partial mremap, partial munmap, partial mprotect) could trigger this. However, most importantly, un-protecting a single sub-page from the userfaultfd-wp handler when processing a uffd-wp event will PTE-map the shared huge zeropage and lose the uffd-wp bit for the remainder of the PMD. Let's properly propagate the uffd-wp bit to the PMDs. #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include static size_t pagesize; static int uffd; static volatile bool uffd_triggered; #define barrier() __asm__ __volatile__("": : :"memory") static void uffd_wp_range(char *start, size_t size, bool wp) { struct uffdio_writeprotect uffd_writeprotect; uffd_writeprotect.range.start = (unsigned long) start; uffd_writeprotect.range.len = size; if (wp) { uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP; } else { uffd_writeprotect.mode = 0; } if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) { fprintf(stderr, "UFFDIO_WRITEPROTECT failed: %d\n", errno); exit(1); } } static void *uffd_thread_fn(void *arg) { static struct uffd_msg msg; ssize_t nread; while (1) { struct pollfd pollfd; int nready; pollfd.fd = uffd; pollfd.events = POLLIN; nready = poll(&pollfd, 1, -1); if (nready == -1) { fprintf(stderr, "poll() failed: %d\n", errno); exit(1); } nread = read(uffd, &msg, sizeof(msg)); if (nread <= 0) continue; if (msg.event != UFFD_EVENT_PAGEFAULT || !(msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP)) { printf("FAIL: wrong uffd-wp event fired\n"); exit(1); } /* un-protect the single page. */ uffd_triggered = true; uffd_wp_range((char *)(uintptr_t)msg.arg.pagefault.address, pagesize, false); } return arg; } static int setup_uffd(char *map, size_t size) { struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; pthread_t thread; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); if (uffd < 0) { fprintf(stderr, "syscall() failed: %d\n", errno); return -errno; } uffdio_api.api = UFFD_API; uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { fprintf(stderr, "UFFDIO_API failed: %d\n", errno); return -errno; } if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) { fprintf(stderr, "UFFD_FEATURE_WRITEPROTECT missing\n"); return -ENOSYS; } uffdio_register.range.start = (unsigned long) map; uffdio_register.range.len = size; uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) { fprintf(stderr, "UFFDIO_REGISTER failed: %d\n", errno); return -errno; } pthread_create(&thread, NULL, uffd_thread_fn, NULL); return 0; } int main(void) { const size_t size = 4 * 1024 * 1024ull; char *map, *cur; pagesize = getpagesize(); map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (map == MAP_FAILED) { fprintf(stderr, "mmap() failed\n"); return -errno; } if (madvise(map, size, MADV_HUGEPAGE)) { fprintf(stderr, "MADV_HUGEPAGE failed\n"); return -errno; } if (setup_uffd(map, size)) return 1; /* Read the whole range, populating zeropages. */ madvise(map, size, MADV_POPULATE_READ); /* Write-protect the whole range. */ uffd_wp_range(map, size, true); /* Make sure uffd-wp triggers on each page. */ for (cur = map; cur < map + size; cur += pagesize) { uffd_triggered = false; barrier(); /* Trigger a write fault. */ *cur = 1; barrier(); if (!uffd_triggered) { printf("FAIL: uffd-wp did not trigger\n"); return 1; } } printf("PASS: uffd-wp triggered\n"); return 0; } Link: https://lkml.kernel.org/r/20230302175423.589164-1-david@redhat.com Fixes: e06f1e1dd499 ("userfaultfd: wp: enabled write protection in userfaultfd API") Signed-off-by: David Hildenbrand Acked-by: Peter Xu Cc: Mike Rapoport Cc: Andrea Arcangeli Cc: Jerome Glisse Cc: Shaohua Li Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4fc43859e59a..032fb0ef9cd1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2037,7 +2037,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; pgtable_t pgtable; - pmd_t _pmd; + pmd_t _pmd, old_pmd; int i; /* @@ -2048,7 +2048,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, * * See Documentation/mm/mmu_notifier.rst */ - pmdp_huge_clear_flush(vma, haddr, pmd); + old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd); pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); @@ -2057,6 +2057,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, pte_t *pte, entry; entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot); entry = pte_mkspecial(entry); + if (pmd_uffd_wp(old_pmd)) + entry = pte_mkuffd_wp(entry); pte = pte_offset_map(&_pmd, haddr); VM_BUG_ON(!pte_none(*pte)); set_pte_at(mm, haddr, pte, entry); -- cgit From af665b40dfa2b49f1e3e11ecd1096506ef40348d Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Thu, 2 Mar 2023 01:54:42 +0200 Subject: mailmap: updates for Jarkko Sakkinen Update to my current employer: https://research.tuni.fi/nisec/ Link: https://lkml.kernel.org/r/20230301235443.6663-1-jarkko@kernel.org Signed-off-by: Jarkko Sakkinen Cc: Arnd Bergmann Cc: Baolin Wang Cc: Ben Widawsky Cc: Bjorn Andersson Cc: Colin Ian King Cc: Kirill Tkhai Cc: Qais Yousef Cc: Vasily Averin Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 5367faaf7831..7581773e1e16 100644 --- a/.mailmap +++ b/.mailmap @@ -191,6 +191,7 @@ Jan Glauber Jan Glauber Jarkko Sakkinen Jarkko Sakkinen +Jarkko Sakkinen Jason Gunthorpe Jason Gunthorpe Jason Gunthorpe -- cgit From 071ca76d2c1dddc6bf2f4917a016207e00bcc8e3 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 1 Mar 2023 12:00:12 +0100 Subject: mailmap: correct Dikshita Agarwal's Qualcomm email address I recently sent a patch to map Dikshita's old CAF address to his current one @ Qualcomm. It turned out however, that he has two of them, with the @quicinc.com one meant for upstream contributions. Fix it. Link: https://lkml.kernel.org/r/20230301110012.1290379-1-konrad.dybcio@linaro.org Signed-off-by: Konrad Dybcio Cc: Dikshita Agarwal Cc: Andy Gross Cc: Arnd Bergmann Cc: Baolin Wang Cc: Bjorn Andersson Cc: Colin Ian King Cc: Kirill Tkhai Cc: Marijn Suijten Cc: Qais Yousef Cc: Vasily Averin Signed-off-by: Andrew Morton --- .mailmap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 7581773e1e16..16b195032269 100644 --- a/.mailmap +++ b/.mailmap @@ -121,7 +121,7 @@ Dengcheng Zhu Dengcheng Zhu Dengcheng Zhu -Dikshita Agarwal +Dikshita Agarwal Dmitry Baryshkov Dmitry Baryshkov <[dbaryshkov@gmail.com]> Dmitry Baryshkov -- cgit From 89a004508081e1d1a498f10ea6d4f7f97a820438 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 1 Mar 2023 10:01:32 +0100 Subject: .mailmap: add Alexandre Ghiti personal email address I'm no longer employed by Canonical which results in email bouncing so add an entry to my personal email address. Link: https://lkml.kernel.org/r/20230301090132.280475-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti Reported-by: Conor Dooley Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 16b195032269..c1def9105d0c 100644 --- a/.mailmap +++ b/.mailmap @@ -28,6 +28,7 @@ Alexander Lobakin Alexander Mikhalitsyn Alexander Mikhalitsyn Alexandre Belloni +Alexandre Ghiti Alexei Starovoitov Alexei Starovoitov Alexei Starovoitov -- cgit From fb3592c41a4427601f9643b2a84e55bb99f5cd7c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 3 Mar 2023 11:01:53 +0800 Subject: migrate_pages: fix deadlock in batched migration Patch series "migrate_pages: fix deadlock in batched synchronous migration", v2. Two deadlock bugs were reported for the migrate_pages() batching series. Thanks Hugh and Pengfei. Analysis shows that if we have locked some other folios except the one we are migrating, it's not safe in general to wait synchronously, for example, to wait the writeback to complete or wait to lock the buffer head. So 1/3 fixes the deadlock in a simple way, where the batching support for the synchronous migration is disabled. The change is straightforward and easy to be understood. While 3/3 re-introduce the batching for synchronous migration via trying to migrate asynchronously in batch optimistically, then fall back to migrate synchronously one by one for fail-to-migrate folios. Test shows that this can restore the TLB flushing batching performance for synchronous migration effectively. This patch (of 3): Two deadlock bugs were reported for the migrate_pages() batching series. Thanks Hugh and Pengfei! For example, in the following deadlock trace snippet, INFO: task kworker/u4:0:9 blocked for more than 147 seconds. Not tainted 6.2.0-rc4-kvm+ #1314 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u4:0 state:D stack:0 pid:9 ppid:2 flags:0x00004000 Workqueue: loop4 loop_rootcg_workfn Call Trace: __schedule+0x43b/0xd00 schedule+0x6a/0xf0 io_schedule+0x4a/0x80 folio_wait_bit_common+0x1b5/0x4e0 ? __pfx_wake_page_function+0x10/0x10 __filemap_get_folio+0x73d/0x770 shmem_get_folio_gfp+0x1fd/0xc80 shmem_write_begin+0x91/0x220 generic_perform_write+0x10e/0x2e0 __generic_file_write_iter+0x17e/0x290 ? generic_write_checks+0x12b/0x1a0 generic_file_write_iter+0x97/0x180 ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20 do_iter_readv_writev+0x13c/0x210 ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20 do_iter_write+0xf6/0x330 vfs_iter_write+0x46/0x70 loop_process_work+0x723/0xfe0 loop_rootcg_workfn+0x28/0x40 process_one_work+0x3cc/0x8d0 worker_thread+0x66/0x630 ? __pfx_worker_thread+0x10/0x10 kthread+0x153/0x190 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x29/0x50 INFO: task repro:1023 blocked for more than 147 seconds. Not tainted 6.2.0-rc4-kvm+ #1314 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:repro state:D stack:0 pid:1023 ppid:360 flags:0x00004004 Call Trace: __schedule+0x43b/0xd00 schedule+0x6a/0xf0 io_schedule+0x4a/0x80 folio_wait_bit_common+0x1b5/0x4e0 ? compaction_alloc+0x77/0x1150 ? __pfx_wake_page_function+0x10/0x10 folio_wait_bit+0x30/0x40 folio_wait_writeback+0x2e/0x1e0 migrate_pages_batch+0x555/0x1ac0 ? __pfx_compaction_alloc+0x10/0x10 ? __pfx_compaction_free+0x10/0x10 ? __this_cpu_preempt_check+0x17/0x20 ? lock_is_held_type+0xe6/0x140 migrate_pages+0x100e/0x1180 ? __pfx_compaction_free+0x10/0x10 ? __pfx_compaction_alloc+0x10/0x10 compact_zone+0xe10/0x1b50 ? lock_is_held_type+0xe6/0x140 ? check_preemption_disabled+0x80/0xf0 compact_node+0xa3/0x100 ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30 ? _find_first_bit+0x7b/0x90 sysctl_compaction_handler+0x5d/0xb0 proc_sys_call_handler+0x29d/0x420 proc_sys_write+0x2b/0x40 vfs_write+0x3a3/0x780 ksys_write+0xb7/0x180 __x64_sys_write+0x26/0x30 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f3a2471f59d RSP: 002b:00007ffe567f7288 EFLAGS: 00000217 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3a2471f59d RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000005 RBP: 00007ffe567f72a0 R08: 0000000000000010 R09: 0000000000000010 R10: 0000000000000010 R11: 0000000000000217 R12: 00000000004012e0 R13: 00007ffe567f73e0 R14: 0000000000000000 R15: 0000000000000000 The page migration task has held the lock of the shmem folio A, and is waiting the writeback of the folio B of the file system on the loop block device to complete. While the loop worker task which writes back the folio B is waiting to lock the shmem folio A, because the folio A backs the folio B in the loop device. Thus deadlock is triggered. In general, if we have locked some other folios except the one we are migrating, it's not safe to wait synchronously, for example, to wait the writeback to complete or wait to lock the buffer head. To fix the deadlock, in this patch, we avoid to batch the page migration except for MIGRATE_ASYNC mode. In MIGRATE_ASYNC mode, synchronous waiting is avoided. The fix can be improved further. We will do that as soon as possible. Link: https://lkml.kernel.org/r/20230303030155.160983-1-ying.huang@intel.com Link: https://lore.kernel.org/linux-mm/87a6c8c-c5c1-67dc-1e32-eb30831d6e3d@google.com/ Link: https://lore.kernel.org/linux-mm/874jrg7kke.fsf@yhuang6-desk2.ccr.corp.intel.com/ Link: https://lore.kernel.org/linux-mm/20230227110614.dngdub2j3exr6dfp@quack3/ Link: https://lkml.kernel.org/r/20230303030155.160983-2-ying.huang@intel.com Fixes: 5dfab109d519 ("migrate_pages: batch _unmap and _move") Signed-off-by: "Huang, Ying" Reported-by: Hugh Dickins Reported-by: "Xu, Pengfei" Cc: Jan Kara Cc: Baolin Wang Cc: Christoph Hellwig Cc: Stefan Roesch Cc: Tejun Heo Cc: Xin Hao Cc: Zi Yan Cc: Yang Shi Cc: Matthew Wilcox Cc: Mike Kravetz Signed-off-by: Andrew Morton --- mm/migrate.c | 69 +++++++++++++++++++++++------------------------------------- 1 file changed, 26 insertions(+), 43 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 98f1c11197a8..f348e0a7b1df 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1112,7 +1112,7 @@ static void migrate_folio_done(struct folio *src, /* Obtain the lock on page, remove all ptes. */ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page, unsigned long private, struct folio *src, - struct folio **dstp, int force, bool avoid_force_lock, + struct folio **dstp, int force, enum migrate_mode mode, enum migrate_reason reason, struct list_head *ret) { @@ -1163,17 +1163,6 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page if (current->flags & PF_MEMALLOC) goto out; - /* - * We have locked some folios and are going to wait to lock - * this folio. To avoid a potential deadlock, let's bail - * out and not do that. The locked folios will be moved and - * unlocked, then we can wait to lock this folio. - */ - if (avoid_force_lock) { - rc = -EDEADLOCK; - goto out; - } - folio_lock(src); } locked = true; @@ -1253,7 +1242,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page /* Establish migration ptes */ VM_BUG_ON_FOLIO(folio_test_anon(src) && !folio_test_ksm(src) && !anon_vma, src); - try_to_migrate(src, TTU_BATCH_FLUSH); + try_to_migrate(src, mode == MIGRATE_ASYNC ? TTU_BATCH_FLUSH : 0); page_was_mapped = 1; } @@ -1267,7 +1256,7 @@ out: * A folio that has not been unmapped will be restored to * right list unless we want to retry. */ - if (rc == -EAGAIN || rc == -EDEADLOCK) + if (rc == -EAGAIN) ret = NULL; migrate_folio_undo_src(src, page_was_mapped, anon_vma, locked, ret); @@ -1618,6 +1607,11 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page, /* * migrate_pages_batch() first unmaps folios in the from list as many as * possible, then move the unmapped folios. + * + * We only batch migration if mode == MIGRATE_ASYNC to avoid to wait a + * lock or bit when we have locked more than one folio. Which may cause + * deadlock (e.g., for loop device). So, if mode != MIGRATE_ASYNC, the + * length of the from list must be <= 1. */ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, free_page_t put_new_page, unsigned long private, @@ -1640,11 +1634,11 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, LIST_HEAD(dst_folios); bool nosplit = (reason == MR_NUMA_MISPLACED); bool no_split_folio_counting = false; - bool avoid_force_lock; + VM_WARN_ON_ONCE(mode != MIGRATE_ASYNC && + !list_empty(from) && !list_is_singular(from)); retry: rc_saved = 0; - avoid_force_lock = false; retry = 1; for (pass = 0; pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); @@ -1689,15 +1683,14 @@ retry: } rc = migrate_folio_unmap(get_new_page, put_new_page, private, - folio, &dst, pass > 2, avoid_force_lock, - mode, reason, ret_folios); + folio, &dst, pass > 2, mode, + reason, ret_folios); /* * The rules are: * Success: folio will be freed * Unmap: folio will be put on unmap_folios list, * dst folio put on dst_folios list * -EAGAIN: stay on the from list - * -EDEADLOCK: stay on the from list * -ENOMEM: stay on the from list * Other errno: put on ret_folios list */ @@ -1749,14 +1742,6 @@ retry: goto out; else goto move; - case -EDEADLOCK: - /* - * The folio cannot be locked for potential deadlock. - * Go move (and unlock) all locked folios. Then we can - * try again. - */ - rc_saved = rc; - goto move; case -EAGAIN: if (is_large) { large_retry++; @@ -1771,11 +1756,6 @@ retry: stats->nr_thp_succeeded += is_thp; break; case MIGRATEPAGE_UNMAP: - /* - * We have locked some folios, don't force lock - * to avoid deadlock. - */ - avoid_force_lock = true; list_move_tail(&folio->lru, &unmap_folios); list_add_tail(&dst->lru, &dst_folios); break; @@ -1900,17 +1880,15 @@ out: */ list_splice_init(from, ret_folios); list_splice_init(&split_folios, from); + /* + * Force async mode to avoid to wait lock or bit when we have + * locked more than one folios. + */ + mode = MIGRATE_ASYNC; no_split_folio_counting = true; goto retry; } - /* - * We have unlocked all locked folios, so we can force lock now, let's - * try again. - */ - if (rc == -EDEADLOCK) - goto retry; - return rc; } @@ -1945,7 +1923,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, enum migrate_mode mode, int reason, unsigned int *ret_succeeded) { int rc, rc_gather; - int nr_pages; + int nr_pages, batch; struct folio *folio, *folio2; LIST_HEAD(folios); LIST_HEAD(ret_folios); @@ -1959,6 +1937,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, mode, reason, &stats, &ret_folios); if (rc_gather < 0) goto out; + + if (mode == MIGRATE_ASYNC) + batch = NR_MAX_BATCHED_MIGRATION; + else + batch = 1; again: nr_pages = 0; list_for_each_entry_safe(folio, folio2, from, lru) { @@ -1969,11 +1952,11 @@ again: } nr_pages += folio_nr_pages(folio); - if (nr_pages > NR_MAX_BATCHED_MIGRATION) + if (nr_pages >= batch) break; } - if (nr_pages > NR_MAX_BATCHED_MIGRATION) - list_cut_before(&folios, from, &folio->lru); + if (nr_pages >= batch) + list_cut_before(&folios, from, &folio2->lru); else list_splice_init(from, &folios); rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private, -- cgit From a21d2133215b58fbf254ea2bb77eb3143ffedf60 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 3 Mar 2023 11:01:54 +0800 Subject: migrate_pages: move split folios processing out of migrate_pages_batch() To simplify the code logic and reduce the line number. Link: https://lkml.kernel.org/r/20230303030155.160983-3-ying.huang@intel.com Fixes: 5dfab109d519 ("migrate_pages: batch _unmap and _move") Signed-off-by: "Huang, Ying" Reviewed-by: Baolin Wang Cc: Hugh Dickins Cc: "Xu, Pengfei" Cc: Christoph Hellwig Cc: Stefan Roesch Cc: Tejun Heo Cc: Xin Hao Cc: Zi Yan Cc: Yang Shi Cc: Matthew Wilcox Cc: Mike Kravetz Signed-off-by: Andrew Morton --- mm/migrate.c | 78 ++++++++++++++++++++++-------------------------------------- 1 file changed, 28 insertions(+), 50 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index f348e0a7b1df..b61abe529107 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1616,9 +1616,10 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page, static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, free_page_t put_new_page, unsigned long private, enum migrate_mode mode, int reason, struct list_head *ret_folios, - struct migrate_pages_stats *stats) + struct list_head *split_folios, struct migrate_pages_stats *stats, + int nr_pass) { - int retry; + int retry = 1; int large_retry = 1; int thp_retry = 1; int nr_failed = 0; @@ -1628,21 +1629,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, bool is_large = false; bool is_thp = false; struct folio *folio, *folio2, *dst = NULL, *dst2; - int rc, rc_saved, nr_pages; - LIST_HEAD(split_folios); + int rc, rc_saved = 0, nr_pages; LIST_HEAD(unmap_folios); LIST_HEAD(dst_folios); bool nosplit = (reason == MR_NUMA_MISPLACED); - bool no_split_folio_counting = false; VM_WARN_ON_ONCE(mode != MIGRATE_ASYNC && !list_empty(from) && !list_is_singular(from)); -retry: - rc_saved = 0; - retry = 1; - for (pass = 0; - pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); - pass++) { + + for (pass = 0; pass < nr_pass && (retry || large_retry); pass++) { retry = 0; large_retry = 0; thp_retry = 0; @@ -1673,7 +1668,7 @@ retry: if (!thp_migration_supported() && is_thp) { nr_large_failed++; stats->nr_thp_failed++; - if (!try_split_folio(folio, &split_folios)) { + if (!try_split_folio(folio, split_folios)) { stats->nr_thp_split++; continue; } @@ -1705,7 +1700,7 @@ retry: stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (!nosplit) { - int ret = try_split_folio(folio, &split_folios); + int ret = try_split_folio(folio, split_folios); if (!ret) { stats->nr_thp_split += is_thp; @@ -1722,18 +1717,11 @@ retry: break; } } - } else if (!no_split_folio_counting) { + } else { nr_failed++; } stats->nr_failed_pages += nr_pages + nr_retry_pages; - /* - * There might be some split folios of fail-to-migrate large - * folios left in split_folios list. Move them to ret_folios - * list so that they could be put back to the right list by - * the caller otherwise the folio refcnt will be leaked. - */ - list_splice_init(&split_folios, ret_folios); /* nr_failed isn't updated for not used */ nr_large_failed += large_retry; stats->nr_thp_failed += thp_retry; @@ -1746,7 +1734,7 @@ retry: if (is_large) { large_retry++; thp_retry += is_thp; - } else if (!no_split_folio_counting) { + } else { retry++; } nr_retry_pages += nr_pages; @@ -1769,7 +1757,7 @@ retry: if (is_large) { nr_large_failed++; stats->nr_thp_failed += is_thp; - } else if (!no_split_folio_counting) { + } else { nr_failed++; } @@ -1787,9 +1775,7 @@ move: try_to_unmap_flush(); retry = 1; - for (pass = 0; - pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); - pass++) { + for (pass = 0; pass < nr_pass && (retry || large_retry); pass++) { retry = 0; large_retry = 0; thp_retry = 0; @@ -1818,7 +1804,7 @@ move: if (is_large) { large_retry++; thp_retry += is_thp; - } else if (!no_split_folio_counting) { + } else { retry++; } nr_retry_pages += nr_pages; @@ -1831,7 +1817,7 @@ move: if (is_large) { nr_large_failed++; stats->nr_thp_failed += is_thp; - } else if (!no_split_folio_counting) { + } else { nr_failed++; } @@ -1868,27 +1854,6 @@ out: dst2 = list_next_entry(dst, lru); } - /* - * Try to migrate split folios of fail-to-migrate large folios, no - * nr_failed counting in this round, since all split folios of a - * large folio is counted as 1 failure in the first round. - */ - if (rc >= 0 && !list_empty(&split_folios)) { - /* - * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY - * retries) to ret_folios to avoid migrating them again. - */ - list_splice_init(from, ret_folios); - list_splice_init(&split_folios, from); - /* - * Force async mode to avoid to wait lock or bit when we have - * locked more than one folios. - */ - mode = MIGRATE_ASYNC; - no_split_folio_counting = true; - goto retry; - } - return rc; } @@ -1927,6 +1892,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, struct folio *folio, *folio2; LIST_HEAD(folios); LIST_HEAD(ret_folios); + LIST_HEAD(split_folios); struct migrate_pages_stats stats; trace_mm_migrate_pages_start(mode, reason); @@ -1960,12 +1926,24 @@ again: else list_splice_init(from, &folios); rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private, - mode, reason, &ret_folios, &stats); + mode, reason, &ret_folios, &split_folios, &stats, + NR_MAX_MIGRATE_PAGES_RETRY); list_splice_tail_init(&folios, &ret_folios); if (rc < 0) { rc_gather = rc; + list_splice_tail(&split_folios, &ret_folios); goto out; } + if (!list_empty(&split_folios)) { + /* + * Failure isn't counted since all split folios of a large folio + * is counted as 1 failure already. And, we only try to migrate + * with minimal effort, force MIGRATE_ASYNC mode and retry once. + */ + migrate_pages_batch(&split_folios, get_new_page, put_new_page, private, + MIGRATE_ASYNC, reason, &ret_folios, NULL, &stats, 1); + list_splice_tail_init(&split_folios, &ret_folios); + } rc_gather += rc; if (!list_empty(from)) goto again; -- cgit From 2ef7dbb269902bde34c82f027806992195d1d1ee Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 3 Mar 2023 11:01:55 +0800 Subject: migrate_pages: try migrate in batch asynchronously firstly When we have locked more than one folios, we cannot wait the lock or bit (e.g., page lock, buffer head lock, writeback bit) synchronously. Otherwise deadlock may be triggered. This make it hard to batch the synchronous migration directly. This patch re-enables batching synchronous migration via trying to migrate in batch asynchronously firstly. And any folios that are failed to be migrated asynchronously will be migrated synchronously one by one. Test shows that this can restore the TLB flushing batching performance for synchronous migration effectively. Link: https://lkml.kernel.org/r/20230303030155.160983-4-ying.huang@intel.com Fixes: 5dfab109d519 ("migrate_pages: batch _unmap and _move") Signed-off-by: "Huang, Ying" Tested-by: Hugh Dickins Reviewed-by: Baolin Wang Cc: "Xu, Pengfei" Cc: Christoph Hellwig Cc: Stefan Roesch Cc: Tejun Heo Cc: Xin Hao Cc: Zi Yan Cc: Yang Shi Cc: Matthew Wilcox Cc: Mike Kravetz Signed-off-by: Andrew Morton --- mm/migrate.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 18 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index b61abe529107..db3f154446af 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1112,9 +1112,8 @@ static void migrate_folio_done(struct folio *src, /* Obtain the lock on page, remove all ptes. */ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page, unsigned long private, struct folio *src, - struct folio **dstp, int force, - enum migrate_mode mode, enum migrate_reason reason, - struct list_head *ret) + struct folio **dstp, enum migrate_mode mode, + enum migrate_reason reason, struct list_head *ret) { struct folio *dst; int rc = -EAGAIN; @@ -1144,7 +1143,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page dst->private = NULL; if (!folio_trylock(src)) { - if (!force || mode == MIGRATE_ASYNC) + if (mode == MIGRATE_ASYNC) goto out; /* @@ -1182,8 +1181,6 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page rc = -EBUSY; goto out; } - if (!force) - goto out; folio_wait_writeback(src); } @@ -1497,6 +1494,9 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f #define NR_MAX_BATCHED_MIGRATION 512 #endif #define NR_MAX_MIGRATE_PAGES_RETRY 10 +#define NR_MAX_MIGRATE_ASYNC_RETRY 3 +#define NR_MAX_MIGRATE_SYNC_RETRY \ + (NR_MAX_MIGRATE_PAGES_RETRY - NR_MAX_MIGRATE_ASYNC_RETRY) struct migrate_pages_stats { int nr_succeeded; /* Normal and large folios migrated successfully, in @@ -1678,8 +1678,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, } rc = migrate_folio_unmap(get_new_page, put_new_page, private, - folio, &dst, pass > 2, mode, - reason, ret_folios); + folio, &dst, mode, reason, ret_folios); /* * The rules are: * Success: folio will be freed @@ -1857,6 +1856,51 @@ out: return rc; } +static int migrate_pages_sync(struct list_head *from, new_page_t get_new_page, + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason, struct list_head *ret_folios, + struct list_head *split_folios, struct migrate_pages_stats *stats) +{ + int rc, nr_failed = 0; + LIST_HEAD(folios); + struct migrate_pages_stats astats; + + memset(&astats, 0, sizeof(astats)); + /* Try to migrate in batch with MIGRATE_ASYNC mode firstly */ + rc = migrate_pages_batch(from, get_new_page, put_new_page, private, MIGRATE_ASYNC, + reason, &folios, split_folios, &astats, + NR_MAX_MIGRATE_ASYNC_RETRY); + stats->nr_succeeded += astats.nr_succeeded; + stats->nr_thp_succeeded += astats.nr_thp_succeeded; + stats->nr_thp_split += astats.nr_thp_split; + if (rc < 0) { + stats->nr_failed_pages += astats.nr_failed_pages; + stats->nr_thp_failed += astats.nr_thp_failed; + list_splice_tail(&folios, ret_folios); + return rc; + } + stats->nr_thp_failed += astats.nr_thp_split; + nr_failed += astats.nr_thp_split; + /* + * Fall back to migrate all failed folios one by one synchronously. All + * failed folios except split THPs will be retried, so their failure + * isn't counted + */ + list_splice_tail_init(&folios, from); + while (!list_empty(from)) { + list_move(from->next, &folios); + rc = migrate_pages_batch(&folios, get_new_page, put_new_page, + private, mode, reason, ret_folios, + split_folios, stats, NR_MAX_MIGRATE_SYNC_RETRY); + list_splice_tail_init(&folios, ret_folios); + if (rc < 0) + return rc; + nr_failed += rc; + } + + return nr_failed; +} + /* * migrate_pages - migrate the folios specified in a list, to the free folios * supplied as the target for the page migration @@ -1888,7 +1932,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, enum migrate_mode mode, int reason, unsigned int *ret_succeeded) { int rc, rc_gather; - int nr_pages, batch; + int nr_pages; struct folio *folio, *folio2; LIST_HEAD(folios); LIST_HEAD(ret_folios); @@ -1904,10 +1948,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, if (rc_gather < 0) goto out; - if (mode == MIGRATE_ASYNC) - batch = NR_MAX_BATCHED_MIGRATION; - else - batch = 1; again: nr_pages = 0; list_for_each_entry_safe(folio, folio2, from, lru) { @@ -1918,16 +1958,20 @@ again: } nr_pages += folio_nr_pages(folio); - if (nr_pages >= batch) + if (nr_pages >= NR_MAX_BATCHED_MIGRATION) break; } - if (nr_pages >= batch) + if (nr_pages >= NR_MAX_BATCHED_MIGRATION) list_cut_before(&folios, from, &folio2->lru); else list_splice_init(from, &folios); - rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private, - mode, reason, &ret_folios, &split_folios, &stats, - NR_MAX_MIGRATE_PAGES_RETRY); + if (mode == MIGRATE_ASYNC) + rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private, + mode, reason, &ret_folios, &split_folios, &stats, + NR_MAX_MIGRATE_PAGES_RETRY); + else + rc = migrate_pages_sync(&folios, get_new_page, put_new_page, private, + mode, reason, &ret_folios, &split_folios, &stats); list_splice_tail_init(&folios, &ret_folios); if (rc < 0) { rc_gather = rc; -- cgit From 90410bcf873cf05f54a32183afff0161f44f9715 Mon Sep 17 00:00:00 2001 From: Jan Kara via Ocfs2-devel Date: Thu, 2 Mar 2023 16:38:43 +0100 Subject: ocfs2: fix data corruption after failed write When buffered write fails to copy data into underlying page cache page, ocfs2_write_end_nolock() just zeroes out and dirties the page. This can leave dirty page beyond EOF and if page writeback tries to write this page before write succeeds and expands i_size, page gets into inconsistent state where page dirty bit is clear but buffer dirty bits stay set resulting in page data never getting written and so data copied to the page is lost. Fix the problem by invalidating page beyond EOF after failed write. Link: https://lkml.kernel.org/r/20230302153843.18499-1-jack@suse.cz Fixes: 6dbf7bb55598 ("fs: Don't invalidate page buffers in block_write_full_page()") Signed-off-by: Jan Kara Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/aops.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1d65f6ef00ca..0394505fdce3 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1977,11 +1977,26 @@ int ocfs2_write_end_nolock(struct address_space *mapping, } if (unlikely(copied < len) && wc->w_target_page) { + loff_t new_isize; + if (!PageUptodate(wc->w_target_page)) copied = 0; - ocfs2_zero_new_buffers(wc->w_target_page, start+copied, - start+len); + new_isize = max_t(loff_t, i_size_read(inode), pos + copied); + if (new_isize > page_offset(wc->w_target_page)) + ocfs2_zero_new_buffers(wc->w_target_page, start+copied, + start+len); + else { + /* + * When page is fully beyond new isize (data copy + * failed), do not bother zeroing the page. Invalidate + * it instead so that writeback does not get confused + * put page & buffer dirty bits into inconsistent + * state. + */ + block_invalidate_folio(page_folio(wc->w_target_page), + 0, PAGE_SIZE); + } } if (wc->w_target_page) flush_dcache_page(wc->w_target_page); -- cgit From 751688b8be9049f558f86982966ecaa61a9cbedf Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 4 Mar 2023 19:39:48 +0000 Subject: mm/damon/paddr: fix folio_size() call after folio_put() in damon_pa_young() Patch series "mm/damon/paddr: Fix folio-use-after-put bugs". There are two folio accesses after folio_put() in mm/damon/paddr.c file. Fix those. This patch (of 2): damon_pa_young() is accessing a folio via folio_size() after folio_put() for the folio has invoked. Fix it. Link: https://lkml.kernel.org/r/20230304193949.296391-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230304193949.296391-2-sj@kernel.org Fixes: 397b0c3a584b ("mm/damon/paddr: remove folio_sz field from damon_pa_access_chk_result") Signed-off-by: SeongJae Park Reviewed-by: Kefeng Wang Reviewed-by: Matthew Wilcox (Oracle) Cc: Vishal Moola (Oracle) Cc: [6.2.x] Signed-off-by: Andrew Morton --- mm/damon/paddr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 6c655d9b5639..49b267b03305 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -130,7 +130,6 @@ static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz) accessed = false; else accessed = true; - folio_put(folio); goto out; } @@ -144,10 +143,10 @@ static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz) if (need_lock) folio_unlock(folio); - folio_put(folio); out: *folio_sz = folio_size(folio); + folio_put(folio); return accessed; } -- cgit From dd52a61da0dd8bab8b90e808f0e5ad507b61ad6d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sat, 4 Mar 2023 19:39:49 +0000 Subject: mm/damon/paddr: fix folio_nr_pages() after folio_put() in damon_pa_mark_accessed_or_deactivate() damon_pa_mark_accessed_or_deactivate() is accessing a folio via folio_nr_pages() after folio_put() for the folio has invoked. Fix it. Link: https://lkml.kernel.org/r/20230304193949.296391-3-sj@kernel.org Fixes: f70da5ee8fe1 ("mm/damon: convert damon_pa_mark_accessed_or_deactivate() to use folios") Signed-off-by: SeongJae Park Reviewed-by: Kefeng Wang Reviewed-by: Matthew Wilcox (Oracle) Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- mm/damon/paddr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 49b267b03305..dd9c33fbe805 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -280,8 +280,8 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate( folio_mark_accessed(folio); else folio_deactivate(folio); - folio_put(folio); applied += folio_nr_pages(folio); + folio_put(folio); } return applied * PAGE_SIZE; } -- cgit From d3c57724f1569311e4b81e98fad0931028b9bdcd Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Sat, 25 Feb 2023 18:01:36 +0800 Subject: scsi: mpt3sas: Fix NULL pointer access in mpt3sas_transport_port_add() Port is allocated by sas_port_alloc_num() and rphy is allocated by either sas_end_device_alloc() or sas_expander_alloc(), all of which may return NULL. So we need to check the rphy to avoid possible NULL pointer access. If sas_rphy_add() returned with failure, rphy is set to NULL. We would access the rphy in the following lines which would also result NULL pointer access. Fixes: 78316e9dfc24 ("scsi: mpt3sas: Fix possible resource leaks in mpt3sas_transport_port_add()") Signed-off-by: Wenchao Hao Link: https://lore.kernel.org/r/20230225100135.2109330-1-haowenchao2@huawei.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_transport.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index e5ecd6ada6cd..e8a4750f6ec4 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -785,7 +785,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle, goto out_fail; } port = sas_port_alloc_num(sas_node->parent_dev); - if ((sas_port_add(port))) { + if (!port || (sas_port_add(port))) { ioc_err(ioc, "failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); goto out_fail; @@ -824,6 +824,12 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle, mpt3sas_port->remote_identify.sas_address; } + if (!rphy) { + ioc_err(ioc, "failure at %s:%d/%s()!\n", + __FILE__, __LINE__, __func__); + goto out_delete_port; + } + rphy->identify = mpt3sas_port->remote_identify; if ((sas_rphy_add(rphy))) { @@ -831,6 +837,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle, __FILE__, __LINE__, __func__); sas_rphy_free(rphy); rphy = NULL; + goto out_delete_port; } if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) { @@ -857,7 +864,10 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle, rphy_to_expander_device(rphy), hba_port->port_id); return mpt3sas_port; - out_fail: +out_delete_port: + sas_port_delete(port); + +out_fail: list_for_each_entry_safe(mpt3sas_phy, next, &mpt3sas_port->phy_list, port_siblings) list_del(&mpt3sas_phy->port_siblings); -- cgit From f305a7b6ca21a665e8d0cf70b5936991a298c93c Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 3 Mar 2023 00:43:31 +0100 Subject: scsi: mpi3mr: Fix throttle_groups memory leak Add a missing kfree(). Fixes: f10af057325c ("scsi: mpi3mr: Resource Based Metering") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230302234336.25456-2-thenzl@redhat.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 29acf6111db3..514ccf29cd7f 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -4389,6 +4389,9 @@ void mpi3mr_free_mem(struct mpi3mr_ioc *mrioc) mrioc->pel_seqnum_virt = NULL; } + kfree(mrioc->throttle_groups); + mrioc->throttle_groups = NULL; + kfree(mrioc->logdata_buf); mrioc->logdata_buf = NULL; -- cgit From 7d2b02172b6a2ae6aecd7ef6480b9c4bf3dc59f4 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 3 Mar 2023 00:43:32 +0100 Subject: scsi: mpi3mr: Fix config page DMA memory leak A fix for: DMA-API: pci 0000:83:00.0: device driver has pending DMA allocations while released from device [count=1] Fixes: 32d457d5a2af ("scsi: mpi3mr: Add framework to issue config requests") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230302234336.25456-3-thenzl@redhat.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 514ccf29cd7f..0736e6a59e81 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -4382,7 +4382,11 @@ void mpi3mr_free_mem(struct mpi3mr_ioc *mrioc) mrioc->admin_req_base, mrioc->admin_req_dma); mrioc->admin_req_base = NULL; } - + if (mrioc->cfg_page) { + dma_free_coherent(&mrioc->pdev->dev, mrioc->cfg_page_sz, + mrioc->cfg_page, mrioc->cfg_page_dma); + mrioc->cfg_page = NULL; + } if (mrioc->pel_seqnum_virt) { dma_free_coherent(&mrioc->pdev->dev, mrioc->pel_seqnum_sz, mrioc->pel_seqnum_virt, mrioc->pel_seqnum_dma); -- cgit From d0f3c3728da8af76dfe435f7f0cfa2b9d9e43ef0 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 3 Mar 2023 00:43:33 +0100 Subject: scsi: mpi3mr: Fix mpi3mr_hba_port memory leak in mpi3mr_remove() Free mpi3mr_hba_port at .remove. Fixes: 42fc9fee116f ("scsi: mpi3mr: Add helper functions to manage device's port") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230302234336.25456-4-thenzl@redhat.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_os.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index a794cc8a1c0b..2ad03b993f05 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -5078,6 +5078,7 @@ static void mpi3mr_remove(struct pci_dev *pdev) struct workqueue_struct *wq; unsigned long flags; struct mpi3mr_tgt_dev *tgtdev, *tgtdev_next; + struct mpi3mr_hba_port *port, *hba_port_next; if (!shost) return; @@ -5117,6 +5118,16 @@ static void mpi3mr_remove(struct pci_dev *pdev) mpi3mr_free_mem(mrioc); mpi3mr_cleanup_resources(mrioc); + spin_lock_irqsave(&mrioc->sas_node_lock, flags); + list_for_each_entry_safe(port, hba_port_next, &mrioc->hba_port_table_list, list) { + ioc_info(mrioc, + "removing hba_port entry: %p port: %d from hba_port list\n", + port, port->port_id); + list_del(&port->list); + kfree(port); + } + spin_unlock_irqrestore(&mrioc->sas_node_lock, flags); + spin_lock(&mrioc_list_lock); list_del(&mrioc->list); spin_unlock(&mrioc_list_lock); -- cgit From d4caa1a4255cc44be56bcab3db2c97c632e6cc10 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 3 Mar 2023 00:43:34 +0100 Subject: scsi: mpi3mr: Fix sas_hba.phy memory leak in mpi3mr_remove() Free mrioc->sas_hba.phy at .remove. Fixes: 42fc9fee116f ("scsi: mpi3mr: Add helper functions to manage device's port") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230302234336.25456-5-thenzl@redhat.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_os.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 2ad03b993f05..2e546c80d98c 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -5128,6 +5128,12 @@ static void mpi3mr_remove(struct pci_dev *pdev) } spin_unlock_irqrestore(&mrioc->sas_node_lock, flags); + if (mrioc->sas_hba.num_phys) { + kfree(mrioc->sas_hba.phy); + mrioc->sas_hba.phy = NULL; + mrioc->sas_hba.num_phys = 0; + } + spin_lock(&mrioc_list_lock); list_del(&mrioc->list); spin_unlock(&mrioc_list_lock); -- cgit From c798304470cab88723d895726d17fcb96472e0e9 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 3 Mar 2023 00:43:35 +0100 Subject: scsi: mpi3mr: Fix memory leaks in mpi3mr_init_ioc() Don't allocate memory again when IOC is being reinitialized. Fixes: fe6db6151565 ("scsi: mpi3mr: Handle offline FW activation in graceful manner") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230302234336.25456-6-thenzl@redhat.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 0736e6a59e81..a565817aa56d 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -3837,29 +3837,34 @@ retry_init: mpi3mr_print_ioc_info(mrioc); - dprint_init(mrioc, "allocating config page buffers\n"); - mrioc->cfg_page = dma_alloc_coherent(&mrioc->pdev->dev, - MPI3MR_DEFAULT_CFG_PAGE_SZ, &mrioc->cfg_page_dma, GFP_KERNEL); if (!mrioc->cfg_page) { - retval = -1; - goto out_failed_noretry; + dprint_init(mrioc, "allocating config page buffers\n"); + mrioc->cfg_page_sz = MPI3MR_DEFAULT_CFG_PAGE_SZ; + mrioc->cfg_page = dma_alloc_coherent(&mrioc->pdev->dev, + mrioc->cfg_page_sz, &mrioc->cfg_page_dma, GFP_KERNEL); + if (!mrioc->cfg_page) { + retval = -1; + goto out_failed_noretry; + } } - mrioc->cfg_page_sz = MPI3MR_DEFAULT_CFG_PAGE_SZ; - - retval = mpi3mr_alloc_reply_sense_bufs(mrioc); - if (retval) { - ioc_err(mrioc, - "%s :Failed to allocated reply sense buffers %d\n", - __func__, retval); - goto out_failed_noretry; + if (!mrioc->init_cmds.reply) { + retval = mpi3mr_alloc_reply_sense_bufs(mrioc); + if (retval) { + ioc_err(mrioc, + "%s :Failed to allocated reply sense buffers %d\n", + __func__, retval); + goto out_failed_noretry; + } } - retval = mpi3mr_alloc_chain_bufs(mrioc); - if (retval) { - ioc_err(mrioc, "Failed to allocated chain buffers %d\n", - retval); - goto out_failed_noretry; + if (!mrioc->chain_sgl_list) { + retval = mpi3mr_alloc_chain_bufs(mrioc); + if (retval) { + ioc_err(mrioc, "Failed to allocated chain buffers %d\n", + retval); + goto out_failed_noretry; + } } retval = mpi3mr_issue_iocinit(mrioc); -- cgit From ce756daa36e1ba271bb3334267295e447aa57a5c Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 3 Mar 2023 00:43:36 +0100 Subject: scsi: mpi3mr: Fix expander node leak in mpi3mr_remove() Add a missing resource clean up in .remove. Fixes: e22bae30667a ("scsi: mpi3mr: Add expander devices to STL") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230302234336.25456-7-thenzl@redhat.com Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr.h | 2 ++ drivers/scsi/mpi3mr/mpi3mr_os.c | 7 +++++++ drivers/scsi/mpi3mr/mpi3mr_transport.c | 5 +---- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 40f238fa80cc..364fb1b5e45a 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -1393,4 +1393,6 @@ void mpi3mr_flush_drv_cmds(struct mpi3mr_ioc *mrioc); void mpi3mr_flush_cmds_for_unrecovered_controller(struct mpi3mr_ioc *mrioc); void mpi3mr_free_enclosure_list(struct mpi3mr_ioc *mrioc); int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc); +void mpi3mr_expander_node_remove(struct mpi3mr_ioc *mrioc, + struct mpi3mr_sas_node *sas_expander); #endif /*MPI3MR_H_INCLUDED*/ diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 2e546c80d98c..6d55698ea4d1 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -5079,6 +5079,7 @@ static void mpi3mr_remove(struct pci_dev *pdev) unsigned long flags; struct mpi3mr_tgt_dev *tgtdev, *tgtdev_next; struct mpi3mr_hba_port *port, *hba_port_next; + struct mpi3mr_sas_node *sas_expander, *sas_expander_next; if (!shost) return; @@ -5119,6 +5120,12 @@ static void mpi3mr_remove(struct pci_dev *pdev) mpi3mr_cleanup_resources(mrioc); spin_lock_irqsave(&mrioc->sas_node_lock, flags); + list_for_each_entry_safe_reverse(sas_expander, sas_expander_next, + &mrioc->sas_expander_list, list) { + spin_unlock_irqrestore(&mrioc->sas_node_lock, flags); + mpi3mr_expander_node_remove(mrioc, sas_expander); + spin_lock_irqsave(&mrioc->sas_node_lock, flags); + } list_for_each_entry_safe(port, hba_port_next, &mrioc->hba_port_table_list, list) { ioc_info(mrioc, "removing hba_port entry: %p port: %d from hba_port list\n", diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c index be25f242fa79..5748bd9369ff 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_transport.c +++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c @@ -9,9 +9,6 @@ #include "mpi3mr.h" -static void mpi3mr_expander_node_remove(struct mpi3mr_ioc *mrioc, - struct mpi3mr_sas_node *sas_expander); - /** * mpi3mr_post_transport_req - Issue transport requests and wait * @mrioc: Adapter instance reference @@ -2164,7 +2161,7 @@ out_fail: * * Return nothing. */ -static void mpi3mr_expander_node_remove(struct mpi3mr_ioc *mrioc, +void mpi3mr_expander_node_remove(struct mpi3mr_ioc *mrioc, struct mpi3mr_sas_node *sas_expander) { struct mpi3mr_sas_port *mr_sas_port, *next; -- cgit From bbdf904b13a62bb8b1272d92a7dde082dff86fbb Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 6 Mar 2023 15:41:01 +0800 Subject: ALSA: hda: intel-dsp-config: add MTL PCI id Use SOF as default audio driver. Signed-off-by: Bard Liao Reviewed-by: Gongjun Song Reviewed-by: Kai Vehmanen Cc: Link: https://lore.kernel.org/r/20230306074101.3906707-1-yung-chuan.liao@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-dsp-config.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index ae31bb127594..317bdf6dcbef 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -472,6 +472,15 @@ static const struct config_entry config_table[] = { }, #endif +/* Meteor Lake */ +#if IS_ENABLED(CONFIG_SND_SOC_SOF_METEORLAKE) + /* Meteorlake-P */ + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x7e28, + }, +#endif + }; static const struct config_entry *snd_intel_dsp_find_config -- cgit From 7bb62340951a9af20235a3bde8c98e2e292915df Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Tue, 7 Mar 2023 21:53:16 +0800 Subject: ALSA: hda/realtek: fix speaker, mute/micmute LEDs not work on a HP platform There is a HP platform needs ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED quirk to make mic-mute/audio-mute/speaker working. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20230307135317.37621-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3c629f4ae080..5d530b489c48 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9447,6 +9447,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b8a, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b8b, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b8d, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b8f, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b92, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), -- cgit From a86e79e3015f5dd8e1b01ccfa49bd5c6e41047a1 Mon Sep 17 00:00:00 2001 From: "Hamidreza H. Fard" Date: Tue, 7 Mar 2023 16:37:41 +0000 Subject: ALSA: hda/realtek: Fix the speaker output on Samsung Galaxy Book2 Pro Samsung Galaxy Book2 Pro (13" 2022 NP930XED-KA1DE) with codec SSID 144d:c868 requires the same workaround for enabling the speaker amp like other Samsung models with ALC298 code. Signed-off-by: Hamidreza H. Fard Cc: Link: https://lore.kernel.org/r/20230307163741.3878-1-nitocris@posteo.net Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5d530b489c48..f09a1d7c1b18 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9540,6 +9540,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc868, "Samsung Galaxy Book2 Pro (NP930XED)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), -- cgit From ff447886e675979d66b2bc01810035d3baea1b3a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 15:40:54 -0600 Subject: ALSA: hda: Match only Intel devices with CONTROLLER_IN_GPU() CONTROLLER_IN_GPU() is clearly intended to match only Intel devices, but previously it checked only the PCI Device ID, not the Vendor ID, so it could match devices from other vendors that happened to use the same Device ID. Update CONTROLLER_IN_GPU() so it matches only Intel devices. Fixes: 535115b5ff51 ("ALSA: hda - Abort the probe without i915 binding for HSW/B") Signed-off-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20230307214054.886721-1-helgaas@kernel.org Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 81c4a45254ff..77a592f21947 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -328,14 +328,15 @@ enum { #define needs_eld_notify_link(chip) false #endif -#define CONTROLLER_IN_GPU(pci) (((pci)->device == 0x0a0c) || \ +#define CONTROLLER_IN_GPU(pci) (((pci)->vendor == 0x8086) && \ + (((pci)->device == 0x0a0c) || \ ((pci)->device == 0x0c0c) || \ ((pci)->device == 0x0d0c) || \ ((pci)->device == 0x160c) || \ ((pci)->device == 0x490d) || \ ((pci)->device == 0x4f90) || \ ((pci)->device == 0x4f91) || \ - ((pci)->device == 0x4f92)) + ((pci)->device == 0x4f92))) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) -- cgit From 068d82e75d537b444303b8c449a11e51ea659565 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 7 Mar 2023 23:22:56 +0000 Subject: netfilter: nft_nat: correct length for loading protocol registers The values in the protocol registers are two bytes wide. However, when parsing the register loads, the code currently uses the larger 16-byte size of a `union nf_inet_addr`. Change it to use the (correct) size of a `union nf_conntrack_man_proto` instead. Fixes: d07db9884a5f ("netfilter: nf_tables: introduce nft_validate_register_load()") Signed-off-by: Jeremy Sowden Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 047999150390..5c29915ab028 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -226,7 +226,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->flags |= NF_NAT_RANGE_MAP_IPS; } - plen = sizeof_field(struct nf_nat_range, min_addr.all); + plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], &priv->sreg_proto_min, plen); -- cgit From ec2c5917eb858428b2083d1c74f445aabbe8316b Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 7 Mar 2023 23:22:57 +0000 Subject: netfilter: nft_masq: correct length for loading protocol registers The values in the protocol registers are two bytes wide. However, when parsing the register loads, the code currently uses the larger 16-byte size of a `union nf_inet_addr`. Change it to use the (correct) size of a `union nf_conntrack_man_proto` instead. Fixes: 8a6bf5da1aef ("netfilter: nft_masq: support port range") Signed-off-by: Jeremy Sowden Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_masq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index e55e455275c4..9544c2f16998 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - u32 plen = sizeof_field(struct nf_nat_range, min_addr.all); + u32 plen = sizeof_field(struct nf_nat_range, min_proto.all); struct nft_masq *priv = nft_expr_priv(expr); int err; -- cgit From 1f617b6b4c7a3d5ea7a56abb83a4c27733b60c2f Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 7 Mar 2023 23:22:58 +0000 Subject: netfilter: nft_redir: correct length for loading protocol registers The values in the protocol registers are two bytes wide. However, when parsing the register loads, the code currently uses the larger 16-byte size of a `union nf_inet_addr`. Change it to use the (correct) size of a `union nf_conntrack_man_proto` instead. Fixes: d07db9884a5f ("netfilter: nf_tables: introduce nft_validate_register_load()") Signed-off-by: Jeremy Sowden Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_redir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 5f7739987559..dbc642f5d32a 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx, unsigned int plen; int err; - plen = sizeof_field(struct nf_nat_range, min_addr.all); + plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], &priv->sreg_proto_min, plen); -- cgit From 493924519b1fe3faab13ee621a43b0d0939abab1 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 7 Mar 2023 23:22:59 +0000 Subject: netfilter: nft_redir: correct value of inet type `.maxattrs` `nft_redir_inet_type.maxattrs` was being set, presumably because of a cut-and-paste error, to `NFTA_MASQ_MAX`, instead of `NFTA_REDIR_MAX`. Fixes: 63ce3940f3ab ("netfilter: nft_redir: add inet support") Signed-off-by: Jeremy Sowden Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_redir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index dbc642f5d32a..67cec56bc84a 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -236,7 +236,7 @@ static struct nft_expr_type nft_redir_inet_type __read_mostly = { .name = "redir", .ops = &nft_redir_inet_ops, .policy = nft_redir_policy, - .maxattr = NFTA_MASQ_MAX, + .maxattr = NFTA_REDIR_MAX, .owner = THIS_MODULE, }; -- cgit From af0f46e5b9a462aaa1d76e82781a5316f03828eb Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 7 Mar 2023 07:51:11 -0800 Subject: ASoC: da7219: Initialize jack_det_mutex The following traceback is reported if mutex debugging is enabled. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 17 at kernel/locking/mutex.c:950 __mutex_lock_common+0x31c/0x11d4 Modules linked in: CPU: 0 PID: 17 Comm: kworker/0:1 Not tainted 5.10.172-lockdep-21846-g849884cfca5a #1 fd2de466502012eb58bc8beb467f07d0b925611f Hardware name: MediaTek kakadu rev0/rev1 board (DT) Workqueue: events da7219_aad_jack_det_work pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--) pc : __mutex_lock_common+0x31c/0x11d4 lr : __mutex_lock_common+0x31c/0x11d4 sp : ffffff80c0317ae0 x29: ffffff80c0317b50 x28: ffffff80c0317b20 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000000 x24: 0000000100000000 x23: ffffffd0121d296c x22: dfffffd000000000 x21: 0000000000000000 x20: 0000000000000000 x19: ffffff80c73d7190 x18: 1ffffff018050f52 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001 x11: 0000000000000000 x10: 0000000000000000 x9 : 83f0d991da544b00 x8 : 83f0d991da544b00 x7 : 0000000000000000 x6 : 0000000000000001 x5 : ffffff80c03176a0 x4 : 0000000000000000 x3 : ffffffd01067fd78 x2 : 0000000100000000 x1 : ffffff80c030ba80 x0 : 0000000000000028 Call trace: __mutex_lock_common+0x31c/0x11d4 mutex_lock_nested+0x98/0xac da7219_aad_jack_det_work+0x54/0xf0 process_one_work+0x6cc/0x19dc worker_thread+0x458/0xddc kthread+0x2fc/0x370 ret_from_fork+0x10/0x30 irq event stamp: 579 hardirqs last enabled at (579): [] exit_to_kernel_mode+0x108/0x138 hardirqs last disabled at (577): [] __do_softirq+0x53c/0x125c softirqs last enabled at (578): [] __irq_exit_rcu+0x264/0x4f4 softirqs last disabled at (573): [] __irq_exit_rcu+0x264/0x4f4 ---[ end trace 26da674636181c40 ]--- Initialize the mutex to fix the problem. Cc: David Rau Fixes: 7fde88eda855 ("ASoC: da7219: Improve the IRQ process to increase the stability") Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20230307155111.1985522-1-linux@roeck-us.net Signed-off-by: Mark Brown --- sound/soc/codecs/da7219-aad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c index 4a4f09f924bc..e3d398b8f54e 100644 --- a/sound/soc/codecs/da7219-aad.c +++ b/sound/soc/codecs/da7219-aad.c @@ -968,6 +968,8 @@ int da7219_aad_init(struct snd_soc_component *component) INIT_WORK(&da7219_aad->hptest_work, da7219_aad_hptest_work); INIT_WORK(&da7219_aad->jack_det_work, da7219_aad_jack_det_work); + mutex_init(&da7219_aad->jack_det_mutex); + ret = request_threaded_irq(da7219_aad->irq, da7219_aad_pre_irq_thread, da7219_aad_irq_thread, IRQF_TRIGGER_LOW | IRQF_ONESHOT, -- cgit From 2ab4f4018cb6b8010ca5002c3bdc37783b5d28c2 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 7 Mar 2023 16:23:24 +0000 Subject: firmware: arm_scmi: Fix device node validation for mailbox transport When mailboxes are used as a transport it is possible to setup the SCMI transport layer, depending on the underlying channels configuration, to use one or two mailboxes, associated, respectively, to one or two, distinct, shared memory areas: any other combination should be treated as invalid. Add more strict checking of SCMI mailbox transport device node descriptors. Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Cc: # 4.19 Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20230307162324.891866-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/mailbox.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 0d9c9538b7f4..112c285deb97 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -52,6 +52,39 @@ static bool mailbox_chan_available(struct device_node *of_node, int idx) "#mbox-cells", idx, NULL); } +static int mailbox_chan_validate(struct device *cdev) +{ + int num_mb, num_sh, ret = 0; + struct device_node *np = cdev->of_node; + + num_mb = of_count_phandle_with_args(np, "mboxes", "#mbox-cells"); + num_sh = of_count_phandle_with_args(np, "shmem", NULL); + /* Bail out if mboxes and shmem descriptors are inconsistent */ + if (num_mb <= 0 || num_sh > 2 || num_mb != num_sh) { + dev_warn(cdev, "Invalid channel descriptor for '%s'\n", + of_node_full_name(np)); + return -EINVAL; + } + + if (num_sh > 1) { + struct device_node *np_tx, *np_rx; + + np_tx = of_parse_phandle(np, "shmem", 0); + np_rx = of_parse_phandle(np, "shmem", 1); + /* SCMI Tx and Rx shared mem areas have to be distinct */ + if (!np_tx || !np_rx || np_tx == np_rx) { + dev_warn(cdev, "Invalid shmem descriptor for '%s'\n", + of_node_full_name(np)); + ret = -EINVAL; + } + + of_node_put(np_tx); + of_node_put(np_rx); + } + + return ret; +} + static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, bool tx) { @@ -64,6 +97,10 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, resource_size_t size; struct resource res; + ret = mailbox_chan_validate(cdev); + if (ret) + return ret; + smbox = devm_kzalloc(dev, sizeof(*smbox), GFP_KERNEL); if (!smbox) return -ENOMEM; -- cgit From d617808e3b8324eacebabefec49dc75536ee39cc Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 8 Jul 2022 21:30:01 +0200 Subject: firmware: arm_scmi: Use the bitmap API to allocate bitmaps Use devm_bitmap_zalloc() instead of hand-writing them. It is less verbose and it improves the semantic. Signed-off-by: Christophe JAILLET Reviewed-by: Cristian Marussi Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/c073b1607ada34d5bde6ce1009179cf15bbf0da3.1657308593.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 15a431639d82..dbc474ff62b7 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2221,8 +2221,8 @@ static int __scmi_xfer_info_init(struct scmi_info *sinfo, hash_init(info->pending_xfers); /* Allocate a bitmask sized to hold MSG_TOKEN_MAX tokens */ - info->xfer_alloc_table = devm_kcalloc(dev, BITS_TO_LONGS(MSG_TOKEN_MAX), - sizeof(long), GFP_KERNEL); + info->xfer_alloc_table = devm_bitmap_zalloc(dev, MSG_TOKEN_MAX, + GFP_KERNEL); if (!info->xfer_alloc_table) return -ENOMEM; -- cgit From f87fb985452ab2083967103ac00bfd68fb182764 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 8 Mar 2023 16:42:42 +0100 Subject: usb: ucsi: Fix NULL pointer deref in ucsi_connector_change() When ucsi_init() fails, ucsi->connector is NULL, yet in case of ucsi_acpi we may still get events which cause the ucs_acpi code to call ucsi_connector_change(), which then derefs the NULL ucsi->connector pointer. Fix this by not setting ucsi->ntfy inside ucsi_init() until ucsi_init() has succeeded, so that ucsi_connector_change() ignores the events because UCSI_ENABLE_NTFY_CONNECTOR_CHANGE is not set in the ntfy mask. Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217106 Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230308154244.722337-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index f632350f6dcb..0623861c597b 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1329,7 +1329,7 @@ out_unlock: static int ucsi_init(struct ucsi *ucsi) { struct ucsi_connector *con; - u64 command; + u64 command, ntfy; int ret; int i; @@ -1341,8 +1341,8 @@ static int ucsi_init(struct ucsi *ucsi) } /* Enable basic notifications */ - ucsi->ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR; - command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy; + ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR; + command = UCSI_SET_NOTIFICATION_ENABLE | ntfy; ret = ucsi_send_command(ucsi, command, NULL, 0); if (ret < 0) goto err_reset; @@ -1374,12 +1374,13 @@ static int ucsi_init(struct ucsi *ucsi) } /* Enable all notifications */ - ucsi->ntfy = UCSI_ENABLE_NTFY_ALL; - command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy; + ntfy = UCSI_ENABLE_NTFY_ALL; + command = UCSI_SET_NOTIFICATION_ENABLE | ntfy; ret = ucsi_send_command(ucsi, command, NULL, 0); if (ret < 0) goto err_unregister; + ucsi->ntfy = ntfy; return 0; err_unregister: -- cgit From 0482c34ec6f8557e06cd0f8e2d0e20e8ede6a22c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 8 Mar 2023 16:42:43 +0100 Subject: usb: ucsi: Fix ucsi->connector race ucsi_init() which runs from a workqueue sets ucsi->connector and on an error will clear it again. ucsi->connector gets dereferenced by ucsi_resume(), this checks for ucsi->connector being NULL in case ucsi_init() has not finished yet; or in case ucsi_init() has failed. ucsi_init() setting ucsi->connector and then clearing it again on an error creates a race where the check in ucsi_resume() may pass, only to have ucsi->connector free-ed underneath it when ucsi_init() hits an error. Fix this race by making ucsi_init() store the connector array in a local variable and only assign it to ucsi->connector on success. Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230308154244.722337-3-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 0623861c597b..8d1baf28df55 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1125,12 +1125,11 @@ static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con) return NULL; } -static int ucsi_register_port(struct ucsi *ucsi, int index) +static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con) { struct usb_power_delivery_desc desc = { ucsi->cap.pd_version}; struct usb_power_delivery_capabilities_desc pd_caps; struct usb_power_delivery_capabilities *pd_cap; - struct ucsi_connector *con = &ucsi->connector[index]; struct typec_capability *cap = &con->typec_cap; enum typec_accessory *accessory = cap->accessory; enum usb_role u_role = USB_ROLE_NONE; @@ -1151,7 +1150,6 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) init_completion(&con->complete); mutex_init(&con->lock); INIT_LIST_HEAD(&con->partner_tasks); - con->num = index + 1; con->ucsi = ucsi; cap->fwnode = ucsi_find_fwnode(con); @@ -1328,7 +1326,7 @@ out_unlock: */ static int ucsi_init(struct ucsi *ucsi) { - struct ucsi_connector *con; + struct ucsi_connector *con, *connector; u64 command, ntfy; int ret; int i; @@ -1359,16 +1357,16 @@ static int ucsi_init(struct ucsi *ucsi) } /* Allocate the connectors. Released in ucsi_unregister() */ - ucsi->connector = kcalloc(ucsi->cap.num_connectors + 1, - sizeof(*ucsi->connector), GFP_KERNEL); - if (!ucsi->connector) { + connector = kcalloc(ucsi->cap.num_connectors + 1, sizeof(*connector), GFP_KERNEL); + if (!connector) { ret = -ENOMEM; goto err_reset; } /* Register all connectors */ for (i = 0; i < ucsi->cap.num_connectors; i++) { - ret = ucsi_register_port(ucsi, i); + connector[i].num = i + 1; + ret = ucsi_register_port(ucsi, &connector[i]); if (ret) goto err_unregister; } @@ -1380,11 +1378,12 @@ static int ucsi_init(struct ucsi *ucsi) if (ret < 0) goto err_unregister; + ucsi->connector = connector; ucsi->ntfy = ntfy; return 0; err_unregister: - for (con = ucsi->connector; con->port; con++) { + for (con = connector; con->port; con++) { ucsi_unregister_partner(con); ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON); ucsi_unregister_port_psy(con); @@ -1400,10 +1399,7 @@ err_unregister: typec_unregister_port(con->port); con->port = NULL; } - - kfree(ucsi->connector); - ucsi->connector = NULL; - + kfree(connector); err_reset: memset(&ucsi->cap, 0, sizeof(ucsi->cap)); ucsi_reset_ppm(ucsi); -- cgit From 02d210f434249a7edbc160969b75df030dc6934d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 8 Mar 2023 16:42:44 +0100 Subject: usb: ucsi_acpi: Increase the command completion timeout Commit 130a96d698d7 ("usb: typec: ucsi: acpi: Increase command completion timeout value") increased the timeout from 5 seconds to 60 seconds due to issues related to alternate mode discovery. After the alternate mode discovery switch to polled mode the timeout was reduced, but instead of being set back to 5 seconds it was reduced to 1 second. This is causing problems when using a Lenovo ThinkPad X1 yoga gen7 connected over Type-C to a LG 27UL850-W (charging DP over Type-C). When the monitor is already connected at boot the following error is logged: "PPM init failed (-110)", /sys/class/typec is empty and on unplugging the NULL pointer deref fixed earlier in this series happens. When the monitor is connected after boot the following error is logged instead: "GET_CONNECTOR_STATUS failed (-110)". Setting the timeout back to 5 seconds fixes both cases. Fixes: e08065069fc7 ("usb: typec: ucsi: acpi: Reduce the command completion timeout") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230308154244.722337-4-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index ce0c8ef80c04..62206a6b8ea7 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -78,7 +78,7 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, if (ret) goto out_clear_bit; - if (!wait_for_completion_timeout(&ua->complete, HZ)) + if (!wait_for_completion_timeout(&ua->complete, 5 * HZ)) ret = -ETIMEDOUT; out_clear_bit: -- cgit From 02c1820345e795148e6b497ef85090915401698e Mon Sep 17 00:00:00 2001 From: Vincenzo Palazzo Date: Thu, 2 Mar 2023 16:07:06 +0100 Subject: usb: dwc3: Fix a typo in field name Fix a typo inside the dwc3 struct docs. Fixes: 63d7f9810a38 ("usb: dwc3: core: Enable GUCTL1 bit 10 for fixing termination error after resume bug") Signed-off-by: Vincenzo Palazzo Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20230302150706.229008-1-vincenzopalazzodev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 582ebd9cf9c2..4743e918dcaf 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1098,7 +1098,7 @@ struct dwc3_scratchpad_array { * change quirk. * @dis_tx_ipgap_linecheck_quirk: set if we disable u2mac linestate * check during HS transmit. - * @resume-hs-terminations: Set if we enable quirk for fixing improper crc + * @resume_hs_terminations: Set if we enable quirk for fixing improper crc * generation after resume from suspend. * @parkmode_disable_ss_quirk: set if we need to disable all SuperSpeed * instances in park mode. -- cgit From e041a2a550582106cba6a7c862c90dfc2ad14492 Mon Sep 17 00:00:00 2001 From: Emil Abildgaard Svendsen Date: Thu, 9 Mar 2023 06:54:41 +0000 Subject: ASoC: hdmi-codec: only startup/shutdown on supported streams Currently only one stream is supported. This isn't usally a problem until you have a multi codec audio card. Because the audio card will run startup and shutdown on both capture and playback streams. So if your hdmi-codec only support either playback or capture. Then ALSA can't open for playback and capture. This patch will ignore if startup and shutdown are called with a non supported stream. Thus, allowing an audio card like this: +-+ cpu1 <--@-| |-> codec1 (HDMI-CODEC) | |<- codec2 (NOT HDMI-CODEC) +-+ Signed-off-by: Emil Svendsen Link: https://lore.kernel.org/r/20230309065432.4150700-2-emas@bang-olufsen.dk Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 01e8ffda2a4b..6d980fbc4207 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -428,8 +428,13 @@ static int hdmi_codec_startup(struct snd_pcm_substream *substream, { struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai); bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; + bool has_capture = !hcp->hcd.no_i2s_capture; + bool has_playback = !hcp->hcd.no_i2s_playback; int ret = 0; + if (!((has_playback && tx) || (has_capture && !tx))) + return 0; + mutex_lock(&hcp->lock); if (hcp->busy) { dev_err(dai->dev, "Only one simultaneous stream supported!\n"); @@ -468,6 +473,12 @@ static void hdmi_codec_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai); + bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; + bool has_capture = !hcp->hcd.no_i2s_capture; + bool has_playback = !hcp->hcd.no_i2s_playback; + + if (!((has_playback && tx) || (has_capture && !tx))) + return; hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN; hcp->hcd.ops->audio_shutdown(dai->dev->parent, hcp->hcd.data); -- cgit From d8a2bb4eb75866275b5cf7de2e593ac3449643e2 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Mon, 6 Mar 2023 12:05:57 -0800 Subject: usb: dwc3: gadget: Add 1ms delay after end transfer command without IOC Previously, there was a 100uS delay inserted after issuing an end transfer command for specific controller revisions. This was due to the fact that there was a GUCTL2 bit field which enabled synchronous completion of the end transfer command once the CMDACT bit was cleared in the DEPCMD register. Since this bit does not exist for all controller revisions and the current implementation heavily relies on utizling the EndTransfer command completion interrupt, add the delay back in for uses where the interrupt on completion bit is not set, and increase the duration to 1ms for the controller to complete the command. An issue was seen where the USB request buffer was unmapped while the DWC3 controller was still accessing the TRB. However, it was confirmed that the end transfer command was successfully submitted. (no end transfer timeout) In situations, such as dwc3_gadget_soft_disconnect() and __dwc3_gadget_ep_disable(), the dwc3_remove_request() is utilized, which will issue the end transfer command, and follow up with dwc3_gadget_giveback(). At least for the USB ep disable path, it is required for any pending and started requests to be completed and returned to the function driver in the same context of the disable call. Without the GUCTL2 bit, it is not ensured that the end transfer is completed before the buffers are unmapped. Fixes: cf2f8b63f7f1 ("usb: dwc3: gadget: Remove END_TRANSFER delay") Cc: stable Signed-off-by: Wesley Cheng Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20230306200557.29387-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 3c63fa97a680..cf5b4f49c3ed 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1699,6 +1699,7 @@ static int __dwc3_gadget_get_frame(struct dwc3 *dwc) */ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) { + struct dwc3 *dwc = dep->dwc; struct dwc3_gadget_ep_cmd_params params; u32 cmd; int ret; @@ -1722,10 +1723,13 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int WARN_ON_ONCE(ret); dep->resource_index = 0; - if (!interrupt) + if (!interrupt) { + if (!DWC3_IP_IS(DWC3) || DWC3_VER_IS_PRIOR(DWC3, 310A)) + mdelay(1); dep->flags &= ~DWC3_EP_TRANSFER_STARTED; - else if (!ret) + } else if (!ret) { dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + } dep->flags &= ~DWC3_EP_DELAY_STOP; return ret; @@ -3774,7 +3778,11 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, * enabled, the EndTransfer command will have completed upon * returning from this function. * - * This mode is NOT available on the DWC_usb31 IP. + * This mode is NOT available on the DWC_usb31 IP. In this + * case, if the IOC bit is not set, then delay by 1ms + * after issuing the EndTransfer command. This allows for the + * controller to handle the command completely before DWC3 + * remove requests attempts to unmap USB request buffers. */ __dwc3_stop_active_transfer(dep, force, interrupt); -- cgit From f7c13cb48e85538709850589b496c4ddb3d3898e Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 23 Feb 2023 08:39:20 +0100 Subject: usb: misc: onboard-hub: add support for Microchip USB2517 USB 2.0 hub Add support for Microchip USB2517 USB 2.0 hub to the onboard usb hub driver. Adopt the generic usb-device compatible ("usbVID,PID"). This hub has the same reset timings as USB2514, so reuse that one. There is also an USB2517I which just has industrial temperature range. Signed-off-by: Alexander Stein Cc: stable Acked-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20230223073920.2912298-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 1 + drivers/usb/misc/onboard_usb_hub.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index 5402e4b7267b..12fc6eb67c3b 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -410,6 +410,7 @@ static const struct usb_device_id onboard_hub_id_table[] = { { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 */ { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */ + { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x5411) }, /* RTS5411 USB 2.1 */ { USB_DEVICE(VENDOR_ID_REALTEK, 0x0414) }, /* RTS5414 USB 3.2 */ diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h index 0a943a154649..aca5f50eb0da 100644 --- a/drivers/usb/misc/onboard_usb_hub.h +++ b/drivers/usb/misc/onboard_usb_hub.h @@ -36,6 +36,7 @@ static const struct onboard_hub_pdata vialab_vl817_data = { static const struct of_device_id onboard_hub_match[] = { { .compatible = "usb424,2514", .data = µchip_usb424_data, }, + { .compatible = "usb424,2517", .data = µchip_usb424_data, }, { .compatible = "usb451,8140", .data = &ti_tusb8041_data, }, { .compatible = "usb451,8142", .data = &ti_tusb8041_data, }, { .compatible = "usb5e3,608", .data = &genesys_gl850g_data, }, -- cgit From 6c67ed9ad9b83e453e808f9b31a931a20a25629b Mon Sep 17 00:00:00 2001 From: Alvin Šipraga Date: Thu, 2 Mar 2023 17:36:47 +0100 Subject: usb: gadget: u_audio: don't let userspace block driver unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the unbind callback for f_uac1 and f_uac2, a call to snd_card_free() via g_audio_cleanup() will disconnect the card and then wait for all resources to be released, which happens when the refcount falls to zero. Since userspace can keep the refcount incremented by not closing the relevant file descriptor, the call to unbind may block indefinitely. This can cause a deadlock during reboot, as evidenced by the following blocked task observed on my machine: task:reboot state:D stack:0 pid:2827 ppid:569 flags:0x0000000c Call trace: __switch_to+0xc8/0x140 __schedule+0x2f0/0x7c0 schedule+0x60/0xd0 schedule_timeout+0x180/0x1d4 wait_for_completion+0x78/0x180 snd_card_free+0x90/0xa0 g_audio_cleanup+0x2c/0x64 afunc_unbind+0x28/0x60 ... kernel_restart+0x4c/0xac __do_sys_reboot+0xcc/0x1ec __arm64_sys_reboot+0x28/0x30 invoke_syscall+0x4c/0x110 ... The issue can also be observed by opening the card with arecord and then stopping the process through the shell before unbinding: # arecord -D hw:UAC2Gadget -f S32_LE -c 2 -r 48000 /dev/null Recording WAVE '/dev/null' : Signed 32 bit Little Endian, Rate 48000 Hz, Stereo ^Z[1]+ Stopped arecord -D hw:UAC2Gadget -f S32_LE -c 2 -r 48000 /dev/null # echo gadget.0 > /sys/bus/gadget/drivers/configfs-gadget/unbind (observe that the unbind command never finishes) Fix the problem by using snd_card_free_when_closed() instead, which will still disconnect the card as desired, but defer the task of freeing the resources to the core once userspace closes its file descriptor. Fixes: 132fcb460839 ("usb: gadget: Add Audio Class 2.0 Driver") Cc: stable@vger.kernel.org Signed-off-by: Alvin Šipraga Reviewed-by: Ruslan Bilovol Reviewed-by: John Keeping Link: https://lore.kernel.org/r/20230302163648.3349669-1-alvin@pqrs.dk Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index c1f62e91b012..4a42574b4a7f 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -1422,7 +1422,7 @@ void g_audio_cleanup(struct g_audio *g_audio) uac = g_audio->uac; card = uac->card; if (card) - snd_card_free(card); + snd_card_free_when_closed(card); kfree(uac->p_prm.reqs); kfree(uac->c_prm.reqs); -- cgit From a826492fc9dfe32afd70fff93955ae8174bbf14b Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 15 Feb 2023 13:49:51 +0800 Subject: usb: typec: tcpm: fix create duplicate source-capabilities file The kernel will dump in the below cases: sysfs: cannot create duplicate filename '/devices/virtual/usb_power_delivery/pd1/source-capabilities' 1. After soft reset has completed, an Explicit Contract negotiation occurs. The sink device will receive source capabilitys again. This will cause a duplicate source-capabilities file be created. 2. Power swap twice on a device that is initailly sink role. This will unregister existing capabilities when above cases occurs. Fixes: 8203d26905ee ("usb: typec: tcpm: Register USB Power Delivery Capabilities") cc: Signed-off-by: Xu Yang Reviewed-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20230215054951.238394-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index a0d943d78580..7f39cb9b3429 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4570,6 +4570,9 @@ static void run_state_machine(struct tcpm_port *port) case SOFT_RESET: port->message_id = 0; port->rx_msgid = -1; + /* remove existing capabilities */ + usb_power_delivery_unregister_capabilities(port->partner_source_caps); + port->partner_source_caps = NULL; tcpm_pd_send_control(port, PD_CTRL_ACCEPT); tcpm_ams_finish(port); if (port->pwr_role == TYPEC_SOURCE) { @@ -4589,6 +4592,9 @@ static void run_state_machine(struct tcpm_port *port) case SOFT_RESET_SEND: port->message_id = 0; port->rx_msgid = -1; + /* remove existing capabilities */ + usb_power_delivery_unregister_capabilities(port->partner_source_caps); + port->partner_source_caps = NULL; if (tcpm_pd_send_control(port, PD_CTRL_SOFT_RESET)) tcpm_set_state_cond(port, hard_reset_state(port), 0); else @@ -4718,6 +4724,9 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state(port, SNK_STARTUP, 0); break; case PR_SWAP_SNK_SRC_SINK_OFF: + /* will be source, remove existing capabilities */ + usb_power_delivery_unregister_capabilities(port->partner_source_caps); + port->partner_source_caps = NULL; /* * Prevent vbus discharge circuit from turning on during PR_SWAP * as this is not a disconnect. -- cgit From abfc4fa28f0160df61c7149567da4f6494dfb488 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 16 Feb 2023 11:15:15 +0800 Subject: usb: typec: tcpm: fix warning when handle discover_identity message Since both source and sink device can send discover_identity message in PD3, kernel may dump below warning: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 169 at drivers/usb/typec/tcpm/tcpm.c:1446 tcpm_queue_vdm+0xe0/0xf0 Modules linked in: CPU: 0 PID: 169 Comm: 1-0050 Not tainted 6.1.1-00038-g6a3c36cf1da2-dirty #567 Hardware name: NXP i.MX8MPlus EVK board (DT) pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : tcpm_queue_vdm+0xe0/0xf0 lr : tcpm_queue_vdm+0x2c/0xf0 sp : ffff80000c19bcd0 x29: ffff80000c19bcd0 x28: 0000000000000001 x27: ffff0000d11c8ab8 x26: ffff0000d11cc000 x25: 0000000000000000 x24: 00000000ff008081 x23: 0000000000000001 x22: 00000000ff00a081 x21: ffff80000c19bdbc x20: 0000000000000000 x19: ffff0000d11c8080 x18: ffffffffffffffff x17: 0000000000000000 x16: 0000000000000000 x15: ffff0000d716f580 x14: 0000000000000001 x13: ffff0000d716f507 x12: 0000000000000001 x11: 0000000000000000 x10: 0000000000000020 x9 : 00000000000ee098 x8 : 00000000ffffffff x7 : 000000000000001c x6 : ffff0000d716f580 x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 x2 : ffff80000c19bdbc x1 : 00000000ff00a081 x0 : 0000000000000004 Call trace: tcpm_queue_vdm+0xe0/0xf0 tcpm_pd_rx_handler+0x340/0x1ab0 kthread_worker_fn+0xcc/0x18c kthread+0x10c/0x110 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- Below sequences may trigger this warning: tcpm_send_discover_work(work) tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0); tcpm_queue_vdm(port, header, data, count); port->vdm_state = VDM_STATE_READY; vdm_state_machine_work(work); <-- received discover_identity from partner vdm_run_state_machine(port); port->vdm_state = VDM_STATE_SEND_MESSAGE; mod_vdm_delayed_work(port, x); tcpm_pd_rx_handler(work); tcpm_pd_data_request(port, msg); tcpm_handle_vdm_request(port, msg->payload, cnt); tcpm_queue_vdm(port, response[0], &response[1], rlen - 1); --> WARN_ON(port->vdm_state > VDM_STATE_DONE); For this case, the state machine could still send out discover identity message later if we skip current discover_identity message. So we should handle the received message firstly and override the pending discover_identity message without warning in this case. Then, a delayed send_discover work will send discover_identity message again. Fixes: e00943e91678 ("usb: typec: tcpm: PD3.0 sinks can send Discover Identity even in device mode") cc: Signed-off-by: Xu Yang Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230216031515.4151117-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 7f39cb9b3429..1ee774c263f0 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1445,10 +1445,18 @@ static int tcpm_ams_start(struct tcpm_port *port, enum tcpm_ams ams) static void tcpm_queue_vdm(struct tcpm_port *port, const u32 header, const u32 *data, int cnt) { + u32 vdo_hdr = port->vdo_data[0]; + WARN_ON(!mutex_is_locked(&port->lock)); - /* Make sure we are not still processing a previous VDM packet */ - WARN_ON(port->vdm_state > VDM_STATE_DONE); + /* If is sending discover_identity, handle received message first */ + if (PD_VDO_SVDM(vdo_hdr) && PD_VDO_CMD(vdo_hdr) == CMD_DISCOVER_IDENT) { + port->send_discover = true; + mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS); + } else { + /* Make sure we are not still processing a previous VDM packet */ + WARN_ON(port->vdm_state > VDM_STATE_DONE); + } port->vdo_count = cnt + 1; port->vdo_data[0] = header; @@ -1948,11 +1956,13 @@ static void vdm_run_state_machine(struct tcpm_port *port) switch (PD_VDO_CMD(vdo_hdr)) { case CMD_DISCOVER_IDENT: res = tcpm_ams_start(port, DISCOVER_IDENTITY); - if (res == 0) + if (res == 0) { port->send_discover = false; - else if (res == -EAGAIN) + } else if (res == -EAGAIN) { + port->vdo_data[0] = 0; mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS); + } break; case CMD_DISCOVER_SVID: res = tcpm_ams_start(port, DISCOVER_SVIDS); @@ -2035,6 +2045,7 @@ static void vdm_run_state_machine(struct tcpm_port *port) unsigned long timeout; port->vdm_retries = 0; + port->vdo_data[0] = 0; port->vdm_state = VDM_STATE_BUSY; timeout = vdm_ready_timeout(vdo_hdr); mod_vdm_delayed_work(port, timeout); -- cgit From 094f391013ba9cc77b4b1ae1617f0a832e598d67 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Wed, 8 Mar 2023 16:52:13 +0000 Subject: docs: usb: Add documentation for the UVC Gadget The UVC Gadget function has become quite complex, but documentation for it is fairly sparse. Add some more detailed documentation to improve the situation. Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20230308165213.139315-1-dan.scally@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/gadget_uvc.rst | 352 +++++++++++++++++++++++++++++++++++++++ Documentation/usb/index.rst | 1 + 2 files changed, 353 insertions(+) create mode 100644 Documentation/usb/gadget_uvc.rst diff --git a/Documentation/usb/gadget_uvc.rst b/Documentation/usb/gadget_uvc.rst new file mode 100644 index 000000000000..6d22faceb1a0 --- /dev/null +++ b/Documentation/usb/gadget_uvc.rst @@ -0,0 +1,352 @@ +======================= +Linux UVC Gadget Driver +======================= + +Overview +-------- +The UVC Gadget driver is a driver for hardware on the *device* side of a USB +connection. It is intended to run on a Linux system that has USB device-side +hardware such as boards with an OTG port. + +On the device system, once the driver is bound it appears as a V4L2 device with +the output capability. + +On the host side (once connected via USB cable), a device running the UVC Gadget +driver *and controlled by an appropriate userspace program* should appear as a UVC +specification compliant camera, and function appropriately with any program +designed to handle them. The userspace program running on the device system can +queue image buffers from a variety of sources to be transmitted via the USB +connection. Typically this would mean forwarding the buffers from a camera sensor +peripheral, but the source of the buffer is entirely dependent on the userspace +companion program. + +Configuring the device kernel +----------------------------- +The Kconfig options USB_CONFIGFS, USB_LIBCOMPOSITE, USB_CONFIGFS_F_UVC and +USB_F_UVC must be selected to enable support for the UVC gadget. + +Configuring the gadget through configfs +--------------------------------------- +The UVC Gadget expects to be configured through configfs using the UVC function. +This allows a significant degree of flexibility, as many of a UVC device's +settings can be controlled this way. + +Not all of the available attributes are described here. For a complete enumeration +see Documentation/ABI/testing/configfs-usb-gadget-uvc + +Assumptions +~~~~~~~~~~~ +This section assumes that you have mounted configfs at `/sys/kernel/config` and +created a gadget as `/sys/kernel/config/usb_gadget/g1`. + +The UVC Function +~~~~~~~~~~~~~~~~ + +The first step is to create the UVC function: + +.. code-block:: bash + + # These variables will be assumed throughout the rest of the document + CONFIGFS="/sys/kernel/config" + GADGET="$CONFIGFS/usb_gadget/g1" + FUNCTION="$GADGET/functions/uvc.0" + + mkdir -p $FUNCTION + +Formats and Frames +~~~~~~~~~~~~~~~~~~ + +You must configure the gadget by telling it which formats you support, as well +as the frame sizes and frame intervals that are supported for each format. In +the current implementation there is no way for the gadget to refuse to set a +format that the host instructs it to set, so it is important that this step is +completed *accurately* to ensure that the host never asks for a format that +can't be provided. + +Formats are created under the streaming/uncompressed and streaming/mjpeg configfs +groups, with the framesizes created under the formats in the following +structure: + +:: + + uvc.0 + + | + + streaming + + | + + mjpeg + + | | + | + mjpeg + + | | + | + 720p + | | + | + 1080p + | + + uncompressed + + | + + yuyv + + | + + 720p + | + + 1080p + +Each frame can then be configured with a width and height, plus the maximum +buffer size required to store a single frame, and finally with the supported +frame intervals for that format and framesize. Width and height are enumerated in +units of pixels, frame interval in units of 100ns. To create the structure +above with 2, 15 and 100 fps frameintervals for each framesize for example you +might do: + +.. code-block:: bash + + create_frame() { + # Example usage: + # create_frame + + WIDTH=$1 + HEIGHT=$2 + FORMAT=$3 + NAME=$4 + + wdir=$FUNCTION/streaming/$FORMAT/$NAME/${HEIGHT}p + + mkdir -p $wdir + echo $WIDTH > $wdir/wWidth + echo $HEIGHT > $wdir/wHeight + echo $(( $WIDTH * $HEIGHT * 2 )) > $wdir/dwMaxVideoFrameBufferSize + cat < $wdir/dwFrameInterval + 666666 + 100000 + 5000000 + EOF + } + + create_frame 1280 720 mjpeg mjpeg + create_frame 1920 1080 mjpeg mjpeg + create_frame 1280 720 uncompressed yuyv + create_frame 1920 1080 uncompressed yuyv + +The only uncompressed format currently supported is YUYV, which is detailed at +Documentation/userspace-api/media/v4l/pixfmt-packed.yuv.rst. + +Color Matching Descriptors +~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's possible to specify some colometry information for each format you create. +This step is optional, and default information will be included if this step is +skipped; those default values follow those defined in the Color Matching Descriptor +section of the UVC specification. + +To create a Color Matching Descriptor, create a configfs item and set its three +attributes to your desired settings and then link to it from the format you wish +it to be associated with: + +.. code-block:: bash + + # Create a new Color Matching Descriptor + + mkdir $FUNCTION/streaming/color_matching/yuyv + pushd $FUNCTION/streaming/color_matching/yuyv + + echo 1 > bColorPrimaries + echo 1 > bTransferCharacteristics + echo 4 > bMatrixCoefficients + + popd + + # Create a symlink to the Color Matching Descriptor from the format's config item + ln -s $FUNCTION/streaming/color_matching/yuyv $FUNCTION/streaming/uncompressed/yuyv + +For details about the valid values, consult the UVC specification. Note that a +default color matching descriptor exists and is used by any format which does +not have a link to a different Color Matching Descriptor. It's possible to +change the attribute settings for the default descriptor, so bear in mind that if +you do that you are altering the defaults for any format that does not link to +a different one. + + +Header linking +~~~~~~~~~~~~~~ + +The UVC specification requires that Format and Frame descriptors be preceded by +Headers detailing things such as the number and cumulative size of the different +Format descriptors that follow. This and similar operations are acheived in +configfs by linking between the configfs item representing the header and the +config items representing those other descriptors, in this manner: + +.. code-block:: bash + + mkdir $FUNCTION/streaming/header/h + + # This section links the format descriptors and their associated frames + # to the header + cd $FUNCTION/streaming/header/h + ln -s ../../uncompressed/yuyv + ln -s ../../mjpeg/mjpeg + + # This section ensures that the header will be transmitted for each + # speed's set of descriptors. If support for a particular speed is not + # needed then it can be skipped here. + cd ../../class/fs + ln -s ../../header/h + cd ../../class/hs + ln -s ../../header/h + cd ../../class/ss + ln -s ../../header/h + cd ../../../control + mkdir header/h + ln -s header/h class/fs + ln -s header/h class/ss + + +Extension Unit Support +~~~~~~~~~~~~~~~~~~~~~~ + +A UVC Extension Unit (XU) basically provides a distinct unit to which control set +and get requests can be addressed. The meaning of those control requests is +entirely implementation dependent, but may be used to control settings outside +of the UVC specification (for example enabling or disabling video effects). An +XU can be inserted into the UVC unit chain or left free-hanging. + +Configuring an extension unit involves creating an entry in the appropriate +directory and setting its attributes appropriately, like so: + +.. code-block:: bash + + mkdir $FUNCTION/control/extensions/xu.0 + pushd $FUNCTION/control/extensions/xu.0 + + # Set the bUnitID of the Processing Unit as the source for this + # Extension Unit + echo 2 > baSourceID + + # Set this XU as the source of the default output terminal. This inserts + # the XU into the UVC chain between the PU and OT such that the final + # chain is IT > PU > XU.0 > OT + cat bUnitID > ../../terminal/output/default/baSourceID + + # Flag some controls as being available for use. The bmControl field is + # a bitmap with each bit denoting the availability of a particular + # control. For example to flag the 0th, 2nd and 3rd controls available: + echo 0x0d > bmControls + + # Set the GUID; this is a vendor-specific code identifying the XU. + echo -e -n "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" > guidExtensionCode + + popd + +The bmControls attribute and the baSourceID attribute are multi-value attributes. +This means that you may write multiple newline separated values to them. For +example to flag the 1st, 2nd, 9th and 10th controls as being available you would +need to write two values to bmControls, like so: + +.. code-block:: bash + + cat << EOF > bmControls + 0x03 + 0x03 + EOF + +The multi-value nature of the baSourceID attribute belies the fact that XUs can +be multiple-input, though note that this currently has no significant effect. + +The bControlSize attribute reflects the size of the bmControls attribute, and +similarly bNrInPins reflects the size of the baSourceID attributes. Both +attributes are automatically increased / decreased as you set bmControls and +baSourceID. It is also possible to manually increase or decrease bControlSize +which has the effect of truncating entries to the new size, or padding entries +out with 0x00, for example: + +:: + + $ cat bmControls + 0x03 + 0x05 + + $ cat bControlSize + 2 + + $ echo 1 > bControlSize + $ cat bmControls + 0x03 + + $ echo 2 > bControlSize + $ cat bmControls + 0x03 + 0x00 + +bNrInPins and baSourceID function in the same way. + +Custom Strings Support +~~~~~~~~~~~~~~~~~~~~~~ + +String descriptors that provide a textual description for various parts of a +USB device can be defined in the usual place within USB configfs, and may then +be linked to from the UVC function root or from Extension Unit directories to +assign those strings as descriptors: + +.. code-block:: bash + + # Create a string descriptor in us-EN and link to it from the function + # root. The name of the link is significant here, as it declares this + # descriptor to be intended for the Interface Association Descriptor. + # Other significant link names at function root are vs0_desc and vs1_desc + # For the VideoStreaming Interface 0/1 Descriptors. + + mkdir -p $GADGET/strings/0x409/iad_desc + echo -n "Interface Associaton Descriptor" > $GADGET/strings/0x409/iad_desc/s + ln -s $GADGET/strings/0x409/iad_desc $FUNCTION/iad_desc + + # Because the link to a String Descriptor from an Extension Unit clearly + # associates the two, the name of this link is not significant and may + # be set freely. + + mkdir -p $GADGET/strings/0x409/xu.0 + echo -n "A Very Useful Extension Unit" > $GADGET/strings/0x409/xu.0/s + ln -s $GADGET/strings/0x409/xu.0 $FUNCTION/control/extensions/xu.0 + +The interrupt endpoint +~~~~~~~~~~~~~~~~~~~~~~ + +The VideoControl interface has an optional interrupt endpoint which is by default +disabled. This is intended to support delayed response control set requests for +UVC (which should respond through the interrupt endpoint rather than tying up +endpoint 0). At present support for sending data through this endpoint is missing +and so it is left disabled to avoid confusion. If you wish to enable it you can +do so through the configfs attribute: + +.. code-block:: bash + + echo 1 > $FUNCTION/control/enable_interrupt_ep + +Bandwidth configuration +~~~~~~~~~~~~~~~~~~~~~~~ + +There are three attributes which control the bandwidth of the USB connection. +These live in the function root and can be set within limits: + +.. code-block:: bash + + # streaming_interval sets bInterval. Values range from 1..255 + echo 1 > $FUNCTION/streaming_interval + + # streaming_maxpacket sets wMaxPacketSize. Valid values are 1024/2048/3072 + echo 3072 > $FUNCTION/streaming_maxpacket + + # streaming_maxburst sets bMaxBurst. Valid values are 1..15 + echo 1 > $FUNCTION/streaming_maxburst + + +The values passed here will be clamped to valid values according to the UVC +specification (which depend on the speed of the USB connection). To understand +how the settings influence bandwidth you should consult the UVC specifications, +but a rule of thumb is that increasing the streaming_maxpacket setting will +improve bandwidth (and thus the maximum possible framerate), whilst the same is +true for streaming_maxburst provided the USB connection is running at SuperSpeed. +Increasing streaming_interval will reduce bandwidth and framerate. + +The userspace application +------------------------- +By itself, the UVC Gadget driver cannot do anything particularly interesting. It +must be paired with a userspace program that responds to UVC control requests and +fills buffers to be queued to the V4L2 device that the driver creates. How those +things are achieved is implementation dependent and beyond the scope of this +document, but a reference application can be found at https://gitlab.freedesktop.org/camera/uvc-gadget diff --git a/Documentation/usb/index.rst b/Documentation/usb/index.rst index b656c9be23ed..27955dad95e1 100644 --- a/Documentation/usb/index.rst +++ b/Documentation/usb/index.rst @@ -16,6 +16,7 @@ USB support gadget_multi gadget_printer gadget_serial + gadget_uvc gadget-testing iuu_phoenix mass-storage -- cgit From 11440da77d6020831ee6f9ce4551b545dea789ee Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Mon, 6 Mar 2023 17:27:51 +0100 Subject: mmc: sdhci_am654: lower power-on failed message severity Lower the power-on failed message severity from warn to info when the controller does not power-up. It's normal to have this situation when the SD card slot is empty, therefore we should not warn the user about it. Fixes: 7ca0f166f5b2 ("mmc: sdhci_am654: Add workaround for card detect debounce timer") Signed-off-by: Francesco Dolcini Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230306162751.163369-1-francesco@dolcini.it Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci_am654.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index 7ef828942df3..89953093e20c 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -369,7 +369,7 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg) MAX_POWER_ON_TIMEOUT, false, host, val, reg); if (ret) - dev_warn(mmc_dev(host->mmc), "Power on failed\n"); + dev_info(mmc_dev(host->mmc), "Power on failed\n"); } } -- cgit From 92771cdd90de64b15e65f3c88d6c6199bd5f33f5 Mon Sep 17 00:00:00 2001 From: William Qiu Date: Tue, 7 Mar 2023 10:46:46 +0800 Subject: mmc: dw_mmc-starfive: Fix initialization of prev_err Fix a bug by making sure prev_err doesn't get used when being uninitialized. Signed-off-by: William Qiu Reported-by: Dan Carpenter Reviewed-by: Emil Renner Berthing Fixes: 9e622229bbf4 ("mmc: starfive: Add sdio/emmc driver support") Link: https://lore.kernel.org/r/20230307024646.10216-3-william.qiu@starfivetech.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-starfive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/dw_mmc-starfive.c b/drivers/mmc/host/dw_mmc-starfive.c index 40f5969b07a6..dab1508bf83c 100644 --- a/drivers/mmc/host/dw_mmc-starfive.c +++ b/drivers/mmc/host/dw_mmc-starfive.c @@ -51,7 +51,7 @@ static int dw_mci_starfive_execute_tuning(struct dw_mci_slot *slot, struct dw_mci *host = slot->host; struct starfive_priv *priv = host->priv; int rise_point = -1, fall_point = -1; - int err, prev_err; + int err, prev_err = 0; int i; bool found = 0; u32 regval; -- cgit From 82f5332d3b9872ab5b287e85c57b76d8bb640cd1 Mon Sep 17 00:00:00 2001 From: Ziyang Huang Date: Tue, 21 Feb 2023 18:30:04 +0800 Subject: usb: dwc2: drd: fix inconsistent mode if role-switch-default-mode="host" Some boards might use USB-A female connector for USB ports, however, the port could be connected to a dual-mode USB controller, making it also behaves as a peripheral device if male-to-male cable is connected. In this case, the dts looks like this: &usb0 { status = "okay"; dr_mode = "otg"; usb-role-switch; role-switch-default-mode = "host"; }; After boot, dwc2_ovr_init() sets GOTGCTL to GOTGCTL_AVALOVAL and call dwc2_force_mode() with parameter host=false, which causes inconsistent mode - The hardware is in peripheral mode while the kernel status is in host mode. What we can do now is to call dwc2_drd_role_sw_set() to switch to device mode, and everything should work just fine now, even switching back to none(default) mode afterwards. Fixes: e14acb876985 ("usb: dwc2: drd: add role-switch-default-node support") Cc: stable Signed-off-by: Ziyang Huang Tested-by: Fabrice Gasnier Acked-by: Minas Harutyunyan Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/SG2PR01MB204837BF68EDB0E343D2A375C9A59@SG2PR01MB2048.apcprd01.prod.exchangelabs.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/drd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/drd.c b/drivers/usb/dwc2/drd.c index d8d6493bc457..a8605b02115b 100644 --- a/drivers/usb/dwc2/drd.c +++ b/drivers/usb/dwc2/drd.c @@ -35,7 +35,8 @@ static void dwc2_ovr_init(struct dwc2_hsotg *hsotg) spin_unlock_irqrestore(&hsotg->lock, flags); - dwc2_force_mode(hsotg, (hsotg->dr_mode == USB_DR_MODE_HOST)); + dwc2_force_mode(hsotg, (hsotg->dr_mode == USB_DR_MODE_HOST) || + (hsotg->role_sw_default_mode == USB_DR_MODE_HOST)); } static int dwc2_ovr_avalid(struct dwc2_hsotg *hsotg, bool valid) -- cgit From d9a02e016aaf5a57fb44e9a5e6da8ccd3b9e2e70 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 8 Mar 2023 14:39:54 -0500 Subject: dm crypt: avoid accessing uninitialized tasklet When neither "no_read_workqueue" nor "no_write_workqueue" are enabled, tasklet_trylock() in crypt_dec_pending() may still return false due to an uninitialized state, and dm-crypt will unnecessarily do io completion in io_queue workqueue instead of current context. Fix this by adding an 'in_tasklet' flag to dm_crypt_io struct and initialize it to false in crypt_io_init(). Set this flag to true in kcryptd_queue_crypt() before calling tasklet_schedule(). If set crypt_dec_pending() will punt io completion to a workqueue. This also nicely avoids the tasklet_trylock/unlock hack when tasklets aren't in use. Fixes: 8e14f610159d ("dm crypt: do not call bio_endio() from the dm-crypt tasklet") Cc: stable@vger.kernel.org Reported-by: Hou Tao Suggested-by: Ignat Korchagin Reviewed-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index faba1be572f9..2764b4ea18a3 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -72,7 +72,9 @@ struct dm_crypt_io { struct crypt_config *cc; struct bio *base_bio; u8 *integrity_metadata; - bool integrity_metadata_from_pool; + bool integrity_metadata_from_pool:1; + bool in_tasklet:1; + struct work_struct work; struct tasklet_struct tasklet; @@ -1731,6 +1733,7 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; + io->in_tasklet = false; atomic_set(&io->io_pending, 0); } @@ -1777,14 +1780,13 @@ static void crypt_dec_pending(struct dm_crypt_io *io) * our tasklet. In this case we need to delay bio_endio() * execution to after the tasklet is done and dequeued. */ - if (tasklet_trylock(&io->tasklet)) { - tasklet_unlock(&io->tasklet); - bio_endio(base_bio); + if (io->in_tasklet) { + INIT_WORK(&io->work, kcryptd_io_bio_endio); + queue_work(cc->io_queue, &io->work); return; } - INIT_WORK(&io->work, kcryptd_io_bio_endio); - queue_work(cc->io_queue, &io->work); + bio_endio(base_bio); } /* @@ -2233,6 +2235,7 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) * it is being executed with irqs disabled. */ if (in_hardirq() || irqs_disabled()) { + io->in_tasklet = true; tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); tasklet_schedule(&io->tasklet); return; -- cgit From 2d638be71155b2e036aca1966b6129e2d661e91f Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 26 Feb 2023 12:38:46 -0500 Subject: Revert "tty: serial: fsl_lpuart: adjust SERIAL_FSL_LPUART_CONSOLE config dependency" This reverts commit 5779a072c248db7a40cfd0f5ea958097fd1d9a30. This results in a link error of ld: drivers/tty/serial/earlycon.o: in function `parse_options': drivers/tty/serial/earlycon.c:99: undefined reference to `uart_parse_earlycon' When the config is in this state CONFIG_SERIAL_CORE=m CONFIG_SERIAL_CORE_CONSOLE=y CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_FSL_LPUART=m CONFIG_SERIAL_FSL_LPUART_CONSOLE=y Fixes: 5779a072c248 ("tty: serial: fsl_lpuart: adjust SERIAL_FSL_LPUART_CONSOLE config dependency") Cc: stable Signed-off-by: Tom Rix Reviewed-by: Randy Dunlap Acked-by: Jiri Slaby Link: https://lore.kernel.org/r/20230226173846.236691-1-trix@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 625358f44419..0072892ca7fc 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1313,7 +1313,7 @@ config SERIAL_FSL_LPUART config SERIAL_FSL_LPUART_CONSOLE bool "Console on Freescale lpuart serial port" - depends on SERIAL_FSL_LPUART + depends on SERIAL_FSL_LPUART=y select SERIAL_CORE_CONSOLE select SERIAL_EARLYCON help -- cgit From 2411fd94ceaa6e11326e95d6ebf876cbfed28d23 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Thu, 23 Feb 2023 17:39:41 +0800 Subject: tty: serial: fsl_lpuart: skip waiting for transmission complete when UARTCTRL_SBK is asserted According to LPUART RM, Transmission Complete Flag becomes 0 if queuing a break character by writing 1 to CTRL[SBK], so here need to skip waiting for transmission complete when UARTCTRL_SBK is asserted, otherwise the kernel may stuck here. And actually set_termios() adds transmission completion waiting to avoid data loss or data breakage when changing the baud rate, but we don't need to worry about this when queuing break characters. Signed-off-by: Sherry Sun Cc: stable Link: https://lore.kernel.org/r/20230223093941.31790-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index e945f41b93d4..f9e164abf920 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2240,9 +2240,15 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, /* update the per-port timeout */ uart_update_timeout(port, termios->c_cflag, baud); - /* wait transmit engin complete */ - lpuart32_write(&sport->port, 0, UARTMODIR); - lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC); + /* + * LPUART Transmission Complete Flag may never be set while queuing a break + * character, so skip waiting for transmission complete when UARTCTRL_SBK is + * asserted. + */ + if (!(old_ctrl & UARTCTRL_SBK)) { + lpuart32_write(&sport->port, 0, UARTMODIR); + lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC); + } /* disable transmit and receive */ lpuart32_write(&sport->port, old_ctrl & ~(UARTCTRL_TE | UARTCTRL_RE), -- cgit From f8086d1a65ac693e3fd863128352b4b11ee7324d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 25 Feb 2023 21:39:53 -0800 Subject: serial: 8250: ASPEED_VUART: select REGMAP instead of depending on it REGMAP is a hidden (not user visible) symbol. Users cannot set it directly thru "make *config", so drivers should select it instead of depending on it if they need it. Consistently using "select" or "depends on" can also help reduce Kconfig circular dependency issues. Therefore, change the use of "depends on REGMAP" to "select REGMAP". Fixes: 8d310c9107a2 ("drivers/tty/serial/8250: Make Aspeed VUART SIRQ polarity configurable") Cc: stable Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Cc: Oskar Senft Cc: linux-serial@vger.kernel.org Link: https://lore.kernel.org/r/20230226053953.4681-9-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index 978dc196c29b..caeff76e58f2 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -257,8 +257,9 @@ config SERIAL_8250_ASPEED_VUART tristate "Aspeed Virtual UART" depends on SERIAL_8250 depends on OF - depends on REGMAP && MFD_SYSCON + depends on MFD_SYSCON depends on ARCH_ASPEED || COMPILE_TEST + select REGMAP help If you want to use the virtual UART (VUART) device on Aspeed BMC platforms, enable this option. This enables the 16550A- -- cgit From 32e293be736b853f168cd065d9cbc1b0c69f545d Mon Sep 17 00:00:00 2001 From: Biju Das Date: Mon, 27 Feb 2023 11:41:46 +0000 Subject: serial: 8250_em: Fix UART port type As per HW manual for EMEV2 "R19UH0040EJ0400 Rev.4.00", the UART IP found on EMMA mobile SoC is Register-compatible with the general-purpose 16750 UART chip. Fix UART port type as 16750 and enable 64-bytes fifo support. Fixes: 22886ee96895 ("serial8250-em: Emma Mobile UART driver V2") Cc: stable@vger.kernel.org Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20230227114152.22265-2-biju.das.jz@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_em.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_em.c b/drivers/tty/serial/8250/8250_em.c index f8e99995eee9..d94c3811a8f7 100644 --- a/drivers/tty/serial/8250/8250_em.c +++ b/drivers/tty/serial/8250/8250_em.c @@ -106,8 +106,8 @@ static int serial8250_em_probe(struct platform_device *pdev) memset(&up, 0, sizeof(up)); up.port.mapbase = regs->start; up.port.irq = irq; - up.port.type = PORT_UNKNOWN; - up.port.flags = UPF_BOOT_AUTOCONF | UPF_FIXED_PORT | UPF_IOREMAP; + up.port.type = PORT_16750; + up.port.flags = UPF_FIXED_PORT | UPF_IOREMAP | UPF_FIXED_TYPE; up.port.dev = &pdev->dev; up.port.private_data = priv; -- cgit From 6e01f9a594ee0f69fb52cc8d11971612b4817f0b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Feb 2023 09:50:46 +0100 Subject: serial: 8250_fsl: fix handle_irq locking The 8250 handle_irq callback is not just called from the interrupt handler but also from a timer callback when polling (e.g. for ports without an interrupt line). Consequently the callback must explicitly disable interrupts to avoid a potential deadlock with another interrupt in polled mode. Fix up the two paths in the freescale callback that failed to re-enable interrupts when polling. Fixes: 853a9ae29e97 ("serial: 8250: fix handle_irq locking") Cc: stable@vger.kernel.org # 5.13 Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/Y/xYzqp4ogmOF5t0@kili Signed-off-by: Johan Hovold Acked-by: Jiri Slaby Link: https://lore.kernel.org/r/20230227085046.24282-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_fsl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 8aad15622a2e..8adfaa183f77 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -34,7 +34,7 @@ int fsl8250_handle_irq(struct uart_port *port) iir = port->serial_in(port, UART_IIR); if (iir & UART_IIR_NO_INT) { - spin_unlock(&up->port.lock); + spin_unlock_irqrestore(&up->port.lock, flags); return 0; } @@ -42,7 +42,7 @@ int fsl8250_handle_irq(struct uart_port *port) if (unlikely(up->lsr_saved_flags & UART_LSR_BI)) { up->lsr_saved_flags &= ~UART_LSR_BI; port->serial_in(port, UART_RX); - spin_unlock(&up->port.lock); + spin_unlock_irqrestore(&up->port.lock, flags); return 1; } -- cgit From 5d943b5d69c032de7ce9cd625ac083a5c277b9c5 Mon Sep 17 00:00:00 2001 From: Kumaravel Thiagarajan Date: Sun, 5 Mar 2023 20:21:24 +0530 Subject: serial: 8250_pci1xxxx: Disable SERIAL_8250_PCI1XXXX config by default Commit 32bb477fa7bf ("serial: 8250_pci1xxxx: Add driver for quad-uart support") made the SERIAL_8250_PCI1XXXX driver enabled when SERIAL_8250 is enabled, disable it as this driver does not need to be enabled by default Fixes: 32bb477fa7bf ("serial: 8250_pci1xxxx: Add driver for quad-uart support") Reported-by: Linus Torvalds Link: https://lore.kernel.org/lkml/CAHk-=whhFCeeuo6vTEmNSx6S-KKkugxgzN_W5Z6v-9yH9gc3Zw@mail.gmail.com/ Signed-off-by: Kumaravel Thiagarajan Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230305145124.13444-1-kumaravel.thiagarajan@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index caeff76e58f2..5313aa31930f 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -300,7 +300,6 @@ config SERIAL_8250_PCI1XXXX tristate "Microchip 8250 based serial port" depends on SERIAL_8250 && PCI select SERIAL_8250_PCILIB - default SERIAL_8250 help Select this option if you have a setup with Microchip PCIe Switch with serial port enabled and wish to enable 8250 -- cgit From 1be6f2b15f902c02e055ae0b419ca789200473c9 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 9 Mar 2023 14:43:02 +0100 Subject: tty: serial: fsl_lpuart: fix race on RX DMA shutdown From time to time DMA completion can come in the middle of DMA shutdown: : : lpuart32_shutdown() lpuart_dma_shutdown() del_timer_sync() lpuart_dma_rx_complete() lpuart_copy_rx_to_tty() mod_timer() lpuart_dma_rx_free() When the timer fires a bit later, sport->dma_rx_desc is NULL: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 pc : lpuart_copy_rx_to_tty+0xcc/0x5bc lr : lpuart_timer_func+0x1c/0x2c Call trace: lpuart_copy_rx_to_tty lpuart_timer_func call_timer_fn __run_timers.part.0 run_timer_softirq __do_softirq __irq_exit_rcu irq_exit handle_domain_irq gic_handle_irq call_on_irq_stack do_interrupt_handler ... To fix this fold del_timer_sync() into lpuart_dma_rx_free() after dmaengine_terminate_sync() to make sure timer will not be re-started in lpuart_copy_rx_to_tty() <= lpuart_dma_rx_complete(). Fixes: 4a8588a1cf86 ("serial: fsl_lpuart: delete timer on shutdown") Cc: stable Signed-off-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20230309134302.74940-2-alexander.sverdlin@siemens.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index f9e164abf920..56e6ba3250cd 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1354,6 +1354,7 @@ static void lpuart_dma_rx_free(struct uart_port *port) struct dma_chan *chan = sport->dma_rx_chan; dmaengine_terminate_sync(chan); + del_timer_sync(&sport->lpuart_timer); dma_unmap_sg(chan->device->dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE); kfree(sport->rx_ring.buf); sport->rx_ring.tail = 0; @@ -1813,7 +1814,6 @@ static int lpuart32_startup(struct uart_port *port) static void lpuart_dma_shutdown(struct lpuart_port *sport) { if (sport->lpuart_dma_rx_use) { - del_timer_sync(&sport->lpuart_timer); lpuart_dma_rx_free(&sport->port); sport->lpuart_dma_rx_use = false; } @@ -1973,10 +1973,8 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, * Since timer function acqures sport->port.lock, need to stop before * acquring same lock because otherwise del_timer_sync() can deadlock. */ - if (old && sport->lpuart_dma_rx_use) { - del_timer_sync(&sport->lpuart_timer); + if (old && sport->lpuart_dma_rx_use) lpuart_dma_rx_free(&sport->port); - } spin_lock_irqsave(&sport->port.lock, flags); @@ -2210,10 +2208,8 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, * Since timer function acqures sport->port.lock, need to stop before * acquring same lock because otherwise del_timer_sync() can deadlock. */ - if (old && sport->lpuart_dma_rx_use) { - del_timer_sync(&sport->lpuart_timer); + if (old && sport->lpuart_dma_rx_use) lpuart_dma_rx_free(&sport->port); - } spin_lock_irqsave(&sport->port.lock, flags); @@ -3020,7 +3016,6 @@ static int lpuart_suspend(struct device *dev) * cannot resume as expected, hence gracefully release the * Rx DMA path before suspend and start Rx DMA path on resume. */ - del_timer_sync(&sport->lpuart_timer); lpuart_dma_rx_free(&sport->port); /* Disable Rx DMA to use UART port as wakeup source */ -- cgit From 1a5ecc73b2bfeffe036212d4a6bfacee053ab0a1 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Mar 2023 18:35:09 -0800 Subject: serdev: Set fwnode for serdev devices This allow fw_devlink to do dependency tracking for serdev devices. Reported-by: Florian Fainelli Link: https://lore.kernel.org/lkml/03b70a8a-0591-f28b-a567-9d2f736f17e5@gmail.com/ Cc: Stefan Wahren Signed-off-by: Saravana Kannan Tested-by: Stefan Wahren Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230302023509.319903-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index aa80de3a8194..678014253b7b 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -534,7 +534,7 @@ static int of_serdev_register_devices(struct serdev_controller *ctrl) if (!serdev) continue; - serdev->dev.of_node = node; + device_set_node(&serdev->dev, of_fwnode_handle(node)); err = serdev_device_add(serdev); if (err) { -- cgit From 9aff74cc4e9eb841dde5fd009ed7ddca5db40e68 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2023 17:44:02 +0100 Subject: serial: qcom-geni: fix console shutdown hang A recent commit added back the calls top stop tx and rx to shutdown() which had previously been removed by commit e83766334f96 ("tty: serial: qcom_geni_serial: No need to stop tx/rx on UART shutdown") in order to be able to use kgdb after stopping the getty. Not only did this again break kgdb, but it also broke serial consoles more generally by hanging TX when stopping the getty during reboot. The underlying problem has been there since the driver was first merged and fixing it is going to be a bit involved so simply stop calling the broken stop functions during shutdown for consoles for now. Fixes: d8aca2f96813 ("tty: serial: qcom-geni-serial: stop operations in progress at shutdown") Cc: stable Cc: Bartosz Golaszewski Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Reviewed-by: Srinivas Kandagatla Tested-by: Srinivas Kandagatla Reviewed-by: Andrew Halaney Tested-by: Andrew Halaney # sa8540p-ride Link: https://lore.kernel.org/r/20230307164405.14218-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index d69592e5e2ec..11da05d8f848 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -1070,6 +1070,10 @@ static int setup_fifos(struct qcom_geni_serial_port *port) static void qcom_geni_serial_shutdown(struct uart_port *uport) { disable_irq(uport->irq); + + if (uart_console(uport)) + return; + qcom_geni_serial_stop_tx(uport); qcom_geni_serial_stop_rx(uport); } -- cgit From 95fcfc08537763bff21ec8c450d3d3cb1a60ad09 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2023 17:44:03 +0100 Subject: serial: qcom-geni: fix DMA mapping leak on shutdown Fix what appears to be a copy-paste error that can lead to a leaked DMA mapping on close() and failure to restart TX after the port is reopened. Note that rx_dma_addr is generally NULL when qcom_geni_serial_stop_tx_dma() is called as part of shutdown() (but tx_dma_addr need not be). Fixes: 2aaa43c70778 ("tty: serial: qcom-geni-serial: add support for serial engine DMA") Cc: stable Cc: Bartosz Golaszewski Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Reviewed-by: Srinivas Kandagatla Tested-by: Srinivas Kandagatla Reviewed-by: Andrew Halaney Tested-by: Andrew Halaney # sa8540p-ride Link: https://lore.kernel.org/r/20230307164405.14218-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 11da05d8f848..2aa3872e6283 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -596,7 +596,7 @@ static void qcom_geni_serial_stop_tx_dma(struct uart_port *uport) if (!qcom_geni_serial_main_active(uport)) return; - if (port->rx_dma_addr) { + if (port->tx_dma_addr) { geni_se_tx_dma_unprep(&port->se, port->tx_dma_addr, port->tx_remaining); port->tx_dma_addr = 0; -- cgit From 97820780b723197d1b472f2bd39fd8593b5d4edc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2023 17:44:04 +0100 Subject: serial: qcom-geni: fix mapping of empty DMA buffer Make sure that there is data in the ring buffer before trying to set up a zero-length DMA transfer. This specifically fixes the following warning when unmapping the empty buffer on the sc8280xp-crd: WARNING: CPU: 0 PID: 138 at drivers/iommu/dma-iommu.c:1046 iommu_dma_unmap_page+0xbc/0xd8 ... Call trace: iommu_dma_unmap_page+0xbc/0xd8 dma_unmap_page_attrs+0x30/0x1c8 geni_se_tx_dma_unprep+0x28/0x38 qcom_geni_serial_isr+0x358/0x75c Fixes: 2aaa43c70778 ("tty: serial: qcom-geni-serial: add support for serial engine DMA") Cc: stable Cc: Bartosz Golaszewski Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Reviewed-by: Srinivas Kandagatla Tested-by: Srinivas Kandagatla Reviewed-by: Andrew Halaney Tested-by: Andrew Halaney # sa8540p-ride Link: https://lore.kernel.org/r/20230307164405.14218-4-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 2aa3872e6283..9871225b2f9b 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -631,6 +631,9 @@ static void qcom_geni_serial_start_tx_dma(struct uart_port *uport) if (port->tx_dma_addr) return; + if (uart_circ_empty(xmit)) + return; + xmit_size = uart_circ_chars_pending(xmit); if (xmit_size < WAKEUP_CHARS) uart_write_wakeup(uport); -- cgit From b6a7bac184472b5b79286a71a61c2f16ea4e86ad Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 7 Mar 2023 17:44:05 +0100 Subject: serial: qcom-geni: drop bogus uart_write_wakeup() Drop the bogus uart_write_wakeup() from when setting up a new DMA transfer, which does not free up any more space in the ring buffer. Any pending writers will be woken up when the transfer completes. Cc: stable Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Reviewed-by: Srinivas Kandagatla Tested-by: Srinivas Kandagatla Reviewed-by: Andrew Halaney Tested-by: Andrew Halaney # sa8540p-ride Link: https://lore.kernel.org/r/20230307164405.14218-5-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 9871225b2f9b..28fbc927a546 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -634,10 +634,6 @@ static void qcom_geni_serial_start_tx_dma(struct uart_port *uport) if (uart_circ_empty(xmit)) return; - xmit_size = uart_circ_chars_pending(xmit); - if (xmit_size < WAKEUP_CHARS) - uart_write_wakeup(uport); - xmit_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE); qcom_geni_serial_setup_tx(uport, xmit_size); -- cgit From 18365ebf23f3e713e5dd8e295c9a639295250f3c Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Mon, 6 Mar 2023 10:49:21 +0100 Subject: tty: vt: protect KD_FONT_OP_GET_TALL from unbound access In ioctl(KD_FONT_OP_GET_TALL), userland tells through op->height which vpitch should be used to copy over the font. In con_font_get, we were not checking that it is within the maximum height value, and thus userland could make the vc->vc_sw->con_font_get(vc, &font, vpitch); call possibly overflow the allocated max_font_size bytes, and the copy_to_user(op->data, font.data, c) call possibly read out of that allocated buffer. By checking vpitch against max_font_height, the max_font_size buffer will always be large enough for the vc->vc_sw->con_font_get(vc, &font, vpitch) call (since we already prevent loading a font larger than that), and c = (font.width+7)/8 * vpitch * font.charcount will always remain below max_font_size. Fixes: 24d69384bcd3 ("VT: Add KD_FONT_OP_SET/GET_TALL operations") Reported-by: syzbot+3af17071816b61e807ed@syzkaller.appspotmail.com Signed-off-by: Samuel Thibault Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20230306094921.tik5ewne4ft6mfpo@begin Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 57a5c23b51d4..3c2ea9c098f7 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4545,6 +4545,9 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) int c; unsigned int vpitch = op->op == KD_FONT_OP_GET_TALL ? op->height : 32; + if (vpitch > max_font_height) + return -EINVAL; + if (op->data) { font.data = kvmalloc(max_font_size, GFP_KERNEL); if (!font.data) -- cgit From 38ed310c22e7a0fc978b1f8292136a4a4a8b3051 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 8 Mar 2023 14:26:02 -0800 Subject: firmware: xilinx: don't make a sleepable memory allocation from an atomic context The following issue was discovered using lockdep: [ 6.691371] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:209 [ 6.694602] in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 1, name: swapper/0 [ 6.702431] 2 locks held by swapper/0/1: [ 6.706300] #0: ffffff8800f6f188 (&dev->mutex){....}-{3:3}, at: __device_driver_lock+0x4c/0x90 [ 6.714900] #1: ffffffc009a2abb8 (enable_lock){....}-{2:2}, at: clk_enable_lock+0x4c/0x140 [ 6.723156] irq event stamp: 304030 [ 6.726596] hardirqs last enabled at (304029): [] _raw_spin_unlock_irqrestore+0xc0/0xd0 [ 6.736142] hardirqs last disabled at (304030): [] clk_enable_lock+0xfc/0x140 [ 6.744742] softirqs last enabled at (303958): [] _stext+0x4f0/0x894 [ 6.752655] softirqs last disabled at (303951): [] irq_exit+0x238/0x280 [ 6.760744] CPU: 1 PID: 1 Comm: swapper/0 Tainted: G U 5.15.36 #2 [ 6.768048] Hardware name: xlnx,zynqmp (DT) [ 6.772179] Call trace: [ 6.774584] dump_backtrace+0x0/0x300 [ 6.778197] show_stack+0x18/0x30 [ 6.781465] dump_stack_lvl+0xb8/0xec [ 6.785077] dump_stack+0x1c/0x38 [ 6.788345] ___might_sleep+0x1a8/0x2a0 [ 6.792129] __might_sleep+0x6c/0xd0 [ 6.795655] kmem_cache_alloc_trace+0x270/0x3d0 [ 6.800127] do_feature_check_call+0x100/0x220 [ 6.804513] zynqmp_pm_invoke_fn+0x8c/0xb0 [ 6.808555] zynqmp_pm_clock_getstate+0x90/0xe0 [ 6.813027] zynqmp_pll_is_enabled+0x8c/0x120 [ 6.817327] zynqmp_pll_enable+0x38/0xc0 [ 6.821197] clk_core_enable+0x144/0x400 [ 6.825067] clk_core_enable+0xd4/0x400 [ 6.828851] clk_core_enable+0xd4/0x400 [ 6.832635] clk_core_enable+0xd4/0x400 [ 6.836419] clk_core_enable+0xd4/0x400 [ 6.840203] clk_core_enable+0xd4/0x400 [ 6.843987] clk_core_enable+0xd4/0x400 [ 6.847771] clk_core_enable+0xd4/0x400 [ 6.851555] clk_core_enable_lock+0x24/0x50 [ 6.855683] clk_enable+0x24/0x40 [ 6.858952] fclk_probe+0x84/0xf0 [ 6.862220] platform_probe+0x8c/0x110 [ 6.865918] really_probe+0x110/0x5f0 [ 6.869530] __driver_probe_device+0xcc/0x210 [ 6.873830] driver_probe_device+0x64/0x140 [ 6.877958] __driver_attach+0x114/0x1f0 [ 6.881828] bus_for_each_dev+0xe8/0x160 [ 6.885698] driver_attach+0x34/0x50 [ 6.889224] bus_add_driver+0x228/0x300 [ 6.893008] driver_register+0xc0/0x1e0 [ 6.896792] __platform_driver_register+0x44/0x60 [ 6.901436] fclk_driver_init+0x1c/0x28 [ 6.905220] do_one_initcall+0x104/0x590 [ 6.909091] kernel_init_freeable+0x254/0x2bc [ 6.913390] kernel_init+0x24/0x130 [ 6.916831] ret_from_fork+0x10/0x20 Fix it by passing the GFP_ATOMIC gfp flag for the corresponding memory allocation. Fixes: acfdd18591ea ("firmware: xilinx: Use hash-table for api feature check") Cc: stable Signed-off-by: Roman Gushchin Cc: Amit Sunil Dhamne Cc: Michal Simek Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/20230308222602.123866-1-roman.gushchin@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/xilinx/zynqmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index acd83d29c866..ce86a1850305 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -206,7 +206,7 @@ static int do_feature_check_call(const u32 api_id) } /* Add new entry if not present */ - feature_data = kmalloc(sizeof(*feature_data), GFP_KERNEL); + feature_data = kmalloc(sizeof(*feature_data), GFP_ATOMIC); if (!feature_data) return -ENOMEM; -- cgit From ecd240875e877d78fd03efbc62292f550872df3f Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 8 Mar 2023 22:06:03 +0100 Subject: ARM: dts: qcom: apq8026-lg-lenok: add missing reserved memory Turns out these two memory regions also need to be avoided, otherwise weird things will happen when Linux tries to use this memory. Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230308-lenok-reserved-memory-v1-1-b8bf6ff01207@z3ntu.xyz --- arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts index de2fb1c01b6e..b82381229adf 100644 --- a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts +++ b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts @@ -27,6 +27,16 @@ }; reserved-memory { + sbl_region: sbl@2f00000 { + reg = <0x02f00000 0x100000>; + no-map; + }; + + external_image_region: external-image@3100000 { + reg = <0x03100000 0x200000>; + no-map; + }; + adsp_region: adsp@3300000 { reg = <0x03300000 0x1400000>; no-map; -- cgit From 6df6fab9320bc9ebdf50136a01e7bf0ee5984c62 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 8 Mar 2023 13:31:29 +0100 Subject: arm64: dts: qcom: sm8450: correct WSA2 assigned clocks The WSA2 assigned-clocks were copied from WSA, but the WSA2 uses its own. Fixes: 14341e76dbc7 ("arm64: dts: qcom: sm8450: add Soundwire and LPASS") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230308123129.232642-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sm8450.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 1a744a33bcf4..e77d188508fd 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -2143,8 +2143,8 @@ <&q6prmcc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>, <&vamacro>; clock-names = "mclk", "npl", "macro", "dcodec", "fsgen"; - assigned-clocks = <&q6prmcc LPASS_CLK_ID_WSA_CORE_TX_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>, - <&q6prmcc LPASS_CLK_ID_WSA_CORE_TX_2X_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>; + assigned-clocks = <&q6prmcc LPASS_CLK_ID_WSA2_CORE_TX_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>, + <&q6prmcc LPASS_CLK_ID_WSA2_CORE_TX_2X_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>; assigned-clock-rates = <19200000>, <19200000>; #clock-cells = <0>; -- cgit From 670b7d6569bf439c90d7aac48ec36ee3e3013754 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Mar 2023 11:57:38 +0000 Subject: arm64: dts: qcom: sc8280xp: fix rx frame shapping info Some of the SoundWire frameshapping data seems incorrect, fix these values. Fixes: 1749a8ae49a3 ("arm64: dts: qcom: sc8280xp: add SoundWire and LPASS") Signed-off-by: Srinivas Kandagatla Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302115741.7726-2-srinivas.kandagatla@linaro.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 0d02599d8867..906cbd81d268 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -2504,12 +2504,12 @@ qcom,ports-sinterval-low = /bits/ 8 <0x03 0x1f 0x1f 0x07 0x00>; qcom,ports-offset1 = /bits/ 8 <0x00 0x00 0x0B 0x01 0x00>; qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x0B 0x00 0x00>; - qcom,ports-hstart = /bits/ 8 <0xff 0x03 0xff 0xff 0xff>; - qcom,ports-hstop = /bits/ 8 <0xff 0x06 0xff 0xff 0xff>; + qcom,ports-hstart = /bits/ 8 <0xff 0x03 0x00 0xff 0xff>; + qcom,ports-hstop = /bits/ 8 <0xff 0x06 0x0f 0xff 0xff>; qcom,ports-word-length = /bits/ 8 <0x01 0x07 0x04 0xff 0xff>; - qcom,ports-block-pack-mode = /bits/ 8 <0xff 0x00 0x01 0xff 0xff>; + qcom,ports-block-pack-mode = /bits/ 8 <0xff 0xff 0x01 0xff 0xff>; qcom,ports-lane-control = /bits/ 8 <0x01 0x00 0x00 0x00 0x00>; - qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0x00>; + qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff 0xff>; #sound-dai-cells = <1>; #address-cells = <2>; @@ -2609,15 +2609,15 @@ qcom,din-ports = <4>; qcom,dout-ports = <0>; - qcom,ports-sinterval-low = /bits/ 8 <0x01 0x03 0x03 0x03>; - qcom,ports-offset1 = /bits/ 8 <0x01 0x00 0x02 0x01>; + qcom,ports-sinterval-low = /bits/ 8 <0x01 0x01 0x03 0x03>; + qcom,ports-offset1 = /bits/ 8 <0x01 0x00 0x02 0x00>; qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x00 0x00>; qcom,ports-block-pack-mode = /bits/ 8 <0xff 0xff 0xff 0xff>; qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff>; qcom,ports-hstop = /bits/ 8 <0xff 0xff 0xff 0xff>; - qcom,ports-word-length = /bits/ 8 <0xff 0x00 0xff 0xff>; + qcom,ports-word-length = /bits/ 8 <0xff 0xff 0xff 0xff>; qcom,ports-block-group-count = /bits/ 8 <0xff 0xff 0xff 0xff>; - qcom,ports-lane-control = /bits/ 8 <0x00 0x01 0x00 0x00>; + qcom,ports-lane-control = /bits/ 8 <0x00 0x01 0x00 0x01>; status = "disabled"; }; -- cgit From e43bd22cb377bf4c4e5b12daacaf02f5c24fbb16 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Mar 2023 11:57:39 +0000 Subject: arm64: dts: qcom: sc8280xp: fix lpass tx macro clocks Tx macro soundwire clock is for some reason is incorrectly assigned to va macro, fix this and use tx macro clock instead. Fixes: 1749a8ae49a3 ("arm64: dts: qcom: sc8280xp: add SoundWire and LPASS") Signed-off-by: Srinivas Kandagatla Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302115741.7726-3-srinivas.kandagatla@linaro.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 906cbd81d268..42bfa9fa5b96 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -2600,7 +2600,7 @@ <&intc GIC_SPI 520 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "core", "wake"; - clocks = <&vamacro>; + clocks = <&txmacro>; clock-names = "iface"; label = "TX"; #sound-dai-cells = <1>; -- cgit From 4def7aa377ba1dbe66335ca3ebe3aa5a5bc3fe67 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Mar 2023 11:57:40 +0000 Subject: arm64: dts: qcom: sc8280xp-x13s: fix dmic sample rate The version of dmic that is on X13s panel supports clock frequency of range 1 Mhz to 4.8 MHz for normal operation. So correct the existing node to reflect this. Fixes: 8c1ea87e80b4 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Signed-off-by: Srinivas Kandagatla Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302115741.7726-4-srinivas.kandagatla@linaro.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index 96b36ce94ce0..7d076b9e85bf 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -1075,7 +1075,7 @@ vdd-micb-supply = <&vreg_s10b>; - qcom,dmic-sample-rate = <600000>; + qcom,dmic-sample-rate = <4800000>; status = "okay"; }; -- cgit From 2e498f35c385654396e94cf12e097522d3973d41 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 2 Mar 2023 11:57:41 +0000 Subject: arm64: dts: qcom: sc8280xp-x13s: fix va dmic dai links and routing VA dmics 0, 1, 2 micbias on X13s are connected to WCD MICBIAS1, WCD MICBIAS1 and WCD MICBIAS3 respectively. Reflect this in dt to get dmics working. Also fix dmics to go via VA Macro instead of TX macro to fix device switching. Fixes: 8c1ea87e80b4 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Signed-off-by: Srinivas Kandagatla Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302115741.7726-5-srinivas.kandagatla@linaro.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index 7d076b9e85bf..fa412bea8985 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -897,9 +897,9 @@ "VA DMIC0", "MIC BIAS1", "VA DMIC1", "MIC BIAS1", "VA DMIC2", "MIC BIAS3", - "TX DMIC0", "MIC BIAS1", - "TX DMIC1", "MIC BIAS2", - "TX DMIC2", "MIC BIAS3", + "VA DMIC0", "VA MIC BIAS1", + "VA DMIC1", "VA MIC BIAS1", + "VA DMIC2", "VA MIC BIAS3", "TX SWR_ADC1", "ADC2_OUTPUT"; wcd-playback-dai-link { @@ -950,7 +950,7 @@ va-dai-link { link-name = "VA Capture"; cpu { - sound-dai = <&q6apmbedai TX_CODEC_DMA_TX_3>; + sound-dai = <&q6apmbedai VA_CODEC_DMA_TX_0>; }; platform { -- cgit From a5982b3971007161b423b39aa843bdb6713a9d44 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 2 Mar 2023 16:47:24 +0100 Subject: arm64: dts: qcom: sm8550: fix LPASS pinctrl slew base address The second LPASS pin controller IO address is supposed to be the MCC range which contains the slew rate registers. The Linux driver then accesses slew rate register with hard-coded offset (0xa000). However the DTS contained the address of slew rate register as the second IO address, thus any reads were effectively pass the memory space and lead to "Internal error: synchronous external aborts" when applying pin configuration. Fixes: 6de7f9c34358 ("arm64: dts: qcom: sm8550: add GPR and LPASS pin controller") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Neil Armstrong Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230302154724.856062-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 25f51245fe9b..24aa724c12ea 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -1997,7 +1997,7 @@ lpass_tlmm: pinctrl@6e80000 { compatible = "qcom,sm8550-lpass-lpi-pinctrl"; reg = <0 0x06e80000 0 0x20000>, - <0 0x0725a000 0 0x10000>; + <0 0x07250000 0 0x10000>; gpio-controller; #gpio-cells = <2>; gpio-ranges = <&lpass_tlmm 0 0 23>; -- cgit From e607b3c1fa0e1579951acd00f9559a77f97d0927 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 7 Mar 2023 21:02:00 +0530 Subject: arm64: dts: qcom: sm8350: Mark UFS controller as cache coherent The UFS controller on SM8350 supports cache coherency, hence add the "dma-coherent" property to mark it as such. Fixes: 59c7cf814783 ("arm64: dts: qcom: sm8350: Add UFS nodes") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Neil Armstrong Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230307153201.180626-1-manivannan.sadhasivam@linaro.org --- arch/arm64/boot/dts/qcom/sm8350.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 1c97e28da6ad..1a5a612d4234 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -1664,6 +1664,7 @@ power-domains = <&gcc UFS_PHY_GDSC>; iommus = <&apps_smmu 0xe0 0x0>; + dma-coherent; clock-names = "core_clk", -- cgit From 8ba961d4339c5db0e69ff6627606fe1f34c838e5 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 7 Mar 2023 21:02:01 +0530 Subject: arm64: dts: qcom: sm8450: Mark UFS controller as cache coherent The UFS controller on SM8450 supports cache coherency, hence add the "dma-coherent" property to mark it as such. Fixes: 07fa917a335e ("arm64: dts: qcom: sm8450: add ufs nodes") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Neil Armstrong Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230307153201.180626-2-manivannan.sadhasivam@linaro.org --- arch/arm64/boot/dts/qcom/sm8450.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index e77d188508fd..b285b1530c10 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -4003,6 +4003,7 @@ power-domains = <&gcc UFS_PHY_GDSC>; iommus = <&apps_smmu 0xe0 0x0>; + dma-coherent; interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>, <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>; -- cgit From b891251b40d4dc4cfd28341f62f6784c02ad3a78 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Tue, 7 Mar 2023 18:23:40 -0500 Subject: arm64: dts: qcom: sa8540p-ride: correct name of remoteproc_nsp0 firmware The cdsp.mbn firmware that's referenced in sa8540p-ride.dts is actually named cdsp0.mbn in the deliverables from Qualcomm. Let's go ahead and correct the name to match what's in Qualcomm's deliverable. Signed-off-by: Brian Masney Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230307232340.2370476-1-bmasney@redhat.com --- arch/arm64/boot/dts/qcom/sa8540p-ride.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sa8540p-ride.dts b/arch/arm64/boot/dts/qcom/sa8540p-ride.dts index 3ccb5ffdb3ca..24fa449d48a6 100644 --- a/arch/arm64/boot/dts/qcom/sa8540p-ride.dts +++ b/arch/arm64/boot/dts/qcom/sa8540p-ride.dts @@ -241,7 +241,7 @@ }; &remoteproc_nsp0 { - firmware-name = "qcom/sa8540p/cdsp.mbn"; + firmware-name = "qcom/sa8540p/cdsp0.mbn"; status = "okay"; }; -- cgit From ee1d5100c37e7a95af506c7addf018f652545ce6 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 8 Mar 2023 11:16:30 +0530 Subject: arm64: dts: qcom: sm8550: Mark UFS controller as cache coherent The UFS controller on SM8550 supports cache coherency, hence add the "dma-coherent" property to mark it as such. Fixes: 35cf1aaab169 ("arm64: dts: qcom: sm8550: Add UFS host controller and phy nodes") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Neil Armstrong Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230308054630.7202-1-manivannan.sadhasivam@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 24aa724c12ea..5d0888398b3c 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -1905,6 +1905,7 @@ required-opps = <&rpmhpd_opp_nom>; iommus = <&apps_smmu 0x60 0x0>; + dma-coherent; interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>, <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>; -- cgit From f3d0fbad6765da25de7ecf6481af9b6ddb0b3793 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 9 Mar 2023 12:12:09 +0100 Subject: firmware: qcom: scm: fix bogus irq error at probe A recent commit added support for an optional interrupt which is only available on some platforms. Stop spamming the logs with bogus error messages on platforms that do not use this new optional resource: qcom_scm firmware:scm: error -ENXIO: IRQ index 0 not found Fixes: 6bf325992236 ("firmware: qcom: scm: Add wait-queue handling logic") Cc: Guru Das Srinagesh Cc: Sibi Sankar Signed-off-by: Johan Hovold Tested-by: Steev Klimaszewski # Thinkpad X13s Acked-by: Guru Das Srinagesh Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230309111209.31606-1-johan+linaro@kernel.org --- drivers/firmware/qcom_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index 468d4d5ab550..b1e11f85b805 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -1479,7 +1479,7 @@ static int qcom_scm_probe(struct platform_device *pdev) init_completion(&__scm->waitq_comp); - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq < 0) { if (irq != -ENXIO) return irq; -- cgit From 3268a4d9b0b85a4382e93bdf7be5400a73db74c5 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Dec 2022 11:23:16 +0800 Subject: power: supply: rk817: Fix unsigned comparison with less than zero The tmp is defined as u32 type, which results in invalid processing of tmp<0 in function rk817_read_or_set_full_charge_on_boot(). Therefore, drop the comparison. drivers/power/supply/rk817_charger.c:828 rk817_read_or_set_full_charge_on_boot() warn: unsigned 'tmp' is never less than zero. drivers/power/supply/rk817_charger.c:788 rk817_read_or_set_full_charge_on_boot() warn: unsigned 'tmp' is never less than zero. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3444 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Tested-by: Chris Morgan Signed-off-by: Sebastian Reichel --- drivers/power/supply/rk817_charger.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index 4f9c1c417916..36f807b5ec44 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -785,8 +785,6 @@ rk817_read_or_set_full_charge_on_boot(struct rk817_charger *charger, regmap_bulk_read(rk808->regmap, RK817_GAS_GAUGE_Q_PRES_H3, bulk_reg, 4); tmp = get_unaligned_be32(bulk_reg); - if (tmp < 0) - tmp = 0; boot_charge_mah = ADC_TO_CHARGE_UAH(tmp, charger->res_div) / 1000; /* @@ -825,8 +823,6 @@ rk817_read_or_set_full_charge_on_boot(struct rk817_charger *charger, regmap_bulk_read(rk808->regmap, RK817_GAS_GAUGE_Q_PRES_H3, bulk_reg, 4); tmp = get_unaligned_be32(bulk_reg); - if (tmp < 0) - tmp = 0; boot_charge_mah = ADC_TO_CHARGE_UAH(tmp, charger->res_div) / 1000; regmap_bulk_read(rk808->regmap, RK817_GAS_GAUGE_OCV_VOL_H, bulk_reg, 2); -- cgit From 14c76b2e75bca4d96e2b85a0c12aa43e84fe3f74 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 12 Dec 2022 13:38:57 -0800 Subject: power: supply: cros_usbpd: reclassify "default case!" as debug This doesn't need to be printed every second as an error: ... <3>[17438.628385] cros-usbpd-charger cros-usbpd-charger.3.auto: Port 1: default case! <3>[17439.634176] cros-usbpd-charger cros-usbpd-charger.3.auto: Port 1: default case! <3>[17440.640298] cros-usbpd-charger cros-usbpd-charger.3.auto: Port 1: default case! ... Reduce priority from ERROR to DEBUG. Signed-off-by: Grant Grundler Reviewed-by: Guenter Roeck Signed-off-by: Sebastian Reichel --- drivers/power/supply/cros_usbpd-charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/cros_usbpd-charger.c b/drivers/power/supply/cros_usbpd-charger.c index cadb6a0c2cc7..b6c96376776a 100644 --- a/drivers/power/supply/cros_usbpd-charger.c +++ b/drivers/power/supply/cros_usbpd-charger.c @@ -276,7 +276,7 @@ static int cros_usbpd_charger_get_power_info(struct port_data *port) port->psy_current_max = 0; break; default: - dev_err(dev, "Port %d: default case!\n", port->port_number); + dev_dbg(dev, "Port %d: default case!\n", port->port_number); port->psy_usb_type = POWER_SUPPLY_USB_TYPE_SDP; } -- cgit From bf6c880d5d1448489ebf92e2d13d5713ff644930 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Tue, 6 Dec 2022 12:17:23 +0300 Subject: power: supply: axp288_fuel_gauge: Added check for negative values Variable 'pirq', which may receive negative value in platform_get_irq(). Used as an index in a function regmap_irq_get_virq(). Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Denis Arefev Reviewed-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/axp288_fuel_gauge.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c index 8e6f8a655079..05f413178462 100644 --- a/drivers/power/supply/axp288_fuel_gauge.c +++ b/drivers/power/supply/axp288_fuel_gauge.c @@ -724,6 +724,8 @@ static int axp288_fuel_gauge_probe(struct platform_device *pdev) for (i = 0; i < AXP288_FG_INTR_NUM; i++) { pirq = platform_get_irq(pdev, i); + if (pirq < 0) + continue; ret = regmap_irq_get_virq(axp20x->regmap_irqc, pirq); if (ret < 0) return dev_err_probe(dev, ret, "getting vIRQ %d\n", pirq); -- cgit From e921050022f1f12d5029d1487a7dfc46cde15523 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Sun, 26 Feb 2023 18:01:36 +0300 Subject: Revert "riscv: mm: notify remote harts about mmu cache updates" This reverts the remaining bits of commit 4bd1d80efb5a ("riscv: mm: notify remote harts harts about mmu cache updates"). According to bug reports, suggested approach to fix stale TLB entries is not sufficient. It needs to be replaced by a more robust solution. Fixes: 4bd1d80efb5a ("riscv: mm: notify remote harts about mmu cache updates") Reported-by: Zong Li Reported-by: Lad Prabhakar Signed-off-by: Sergey Matyukevich Cc: stable@vger.kernel.org Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/20230226150137.1919750-2-geomatsi@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 2 -- arch/riscv/include/asm/tlbflush.h | 18 ------------------ arch/riscv/mm/context.c | 10 ---------- arch/riscv/mm/tlbflush.c | 28 +++++++++++++++++----------- 4 files changed, 17 insertions(+), 41 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 5ff1f19fd45c..0099dc116168 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,8 +19,6 @@ typedef struct { #ifdef CONFIG_SMP /* A local icache flush is needed before user execution can resume. */ cpumask_t icache_stale_mask; - /* A local tlb flush is needed before user execution can resume. */ - cpumask_t tlb_stale_mask; #endif } mm_context_t; diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 907b9efd39a8..801019381dea 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,24 +22,6 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } - -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); -} - #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_page(addr) do { } while (0) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 80ce9caba8d2..7acbfbd14557 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -196,16 +196,6 @@ switch_mm_fast: if (need_flush_tlb) local_flush_tlb_all(); -#ifdef CONFIG_SMP - else { - cpumask_t *mask = &mm->context.tlb_stale_mask; - - if (cpumask_test_cpu(cpu, mask)) { - cpumask_clear_cpu(cpu, mask); - local_flush_tlb_all_asid(cntx & asid_mask); - } - } -#endif } static void set_mm_noasid(struct mm_struct *mm) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index ce7dfc81bb3f..37ed760d007c 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -5,7 +5,23 @@ #include #include #include -#include + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); +} void flush_tlb_all(void) { @@ -15,7 +31,6 @@ void flush_tlb_all(void) static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { - struct cpumask *pmask = &mm->context.tlb_stale_mask; struct cpumask *cmask = mm_cpumask(mm); unsigned int cpuid; bool broadcast; @@ -29,15 +44,6 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, if (static_branch_unlikely(&use_asid_allocator)) { unsigned long asid = atomic_long_read(&mm->context.id); - /* - * TLB will be immediately flushed on harts concurrently - * executing this MM context. TLB flush on other harts - * is deferred until this MM context migrates there. - */ - cpumask_setall(pmask); - cpumask_clear_cpu(cpuid, pmask); - cpumask_andnot(pmask, pmask, cmask); - if (broadcast) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); } else if (size <= stride) { -- cgit From 82dd33fde0268cc622d3d1ac64971f3f61634142 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sun, 26 Feb 2023 18:01:37 +0300 Subject: riscv: asid: Fixup stale TLB entry cause application crash After use_asid_allocator is enabled, the userspace application will crash by stale TLB entries. Because only using cpumask_clear_cpu without local_flush_tlb_all couldn't guarantee CPU's TLB entries were fresh. Then set_mm_asid would cause the user space application to get a stale value by stale TLB entry, but set_mm_noasid is okay. Here is the symptom of the bug: unhandled signal 11 code 0x1 (coredump) 0x0000003fd6d22524 <+4>: auipc s0,0x70 0x0000003fd6d22528 <+8>: ld s0,-148(s0) # 0x3fd6d92490 => 0x0000003fd6d2252c <+12>: ld a5,0(s0) (gdb) i r s0 s0 0x8082ed1cc3198b21 0x8082ed1cc3198b21 (gdb) x /2x 0x3fd6d92490 0x3fd6d92490: 0xd80ac8a8 0x0000003f The core dump file shows that register s0 is wrong, but the value in memory is correct. Because 'ld s0, -148(s0)' used a stale mapping entry in TLB and got a wrong result from an incorrect physical address. When the task ran on CPU0, which loaded/speculative-loaded the value of address(0x3fd6d92490), then the first version of the mapping entry was PTWed into CPU0's TLB. When the task switched from CPU0 to CPU1 (No local_tlb_flush_all here by asid), it happened to write a value on the address (0x3fd6d92490). It caused do_page_fault -> wp_page_copy -> ptep_clear_flush -> ptep_get_and_clear & flush_tlb_page. The flush_tlb_page used mm_cpumask(mm) to determine which CPUs need TLB flush, but CPU0 had cleared the CPU0's mm_cpumask in the previous switch_mm. So we only flushed the CPU1 TLB and set the second version mapping of the PTE. When the task switched from CPU1 to CPU0 again, CPU0 still used a stale TLB mapping entry which contained a wrong target physical address. It raised a bug when the task happened to read that value. CPU0 CPU1 - switch 'task' in - read addr (Fill stale mapping entry into TLB) - switch 'task' out (no tlb_flush) - switch 'task' in (no tlb_flush) - write addr cause pagefault do_page_fault() (change to new addr mapping) wp_page_copy() ptep_clear_flush() ptep_get_and_clear() & flush_tlb_page() write new value into addr - switch 'task' out (no tlb_flush) - switch 'task' in (no tlb_flush) - read addr again (Use stale mapping entry in TLB) get wrong value from old phyical addr, BUG! The solution is to keep all CPUs' footmarks of cpumask(mm) in switch_mm, which could guarantee to invalidate all stale TLB entries during TLB flush. Fixes: 65d4b9c53017 ("RISC-V: Implement ASID allocator") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Tested-by: Lad Prabhakar Tested-by: Zong Li Tested-by: Sergey Matyukevich Cc: Anup Patel Cc: Palmer Dabbelt Cc: stable@vger.kernel.org Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230226150137.1919750-3-geomatsi@gmail.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/context.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 7acbfbd14557..0f784e3d307b 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -205,12 +205,24 @@ static void set_mm_noasid(struct mm_struct *mm) local_flush_tlb_all(); } -static inline void set_mm(struct mm_struct *mm, unsigned int cpu) +static inline void set_mm(struct mm_struct *prev, + struct mm_struct *next, unsigned int cpu) { - if (static_branch_unlikely(&use_asid_allocator)) - set_mm_asid(mm, cpu); - else - set_mm_noasid(mm); + /* + * The mm_cpumask indicates which harts' TLBs contain the virtual + * address mapping of the mm. Compared to noasid, using asid + * can't guarantee that stale TLB entries are invalidated because + * the asid mechanism wouldn't flush TLB for every switch_mm for + * performance. So when using asid, keep all CPUs footmarks in + * cpumask() until mm reset. + */ + cpumask_set_cpu(cpu, mm_cpumask(next)); + if (static_branch_unlikely(&use_asid_allocator)) { + set_mm_asid(next, cpu); + } else { + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + set_mm_noasid(next); + } } static int __init asids_init(void) @@ -264,7 +276,8 @@ static int __init asids_init(void) } early_initcall(asids_init); #else -static inline void set_mm(struct mm_struct *mm, unsigned int cpu) +static inline void set_mm(struct mm_struct *prev, + struct mm_struct *next, unsigned int cpu) { /* Nothing to do here when there is no MMU */ } @@ -317,10 +330,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ cpu = smp_processor_id(); - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - cpumask_set_cpu(cpu, mm_cpumask(next)); - - set_mm(next, cpu); + set_mm(prev, next, cpu); flush_icache_deferred(next, cpu); } -- cgit From 47c29d69212911f50bdcdd0564b5999a559010d4 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Fri, 10 Mar 2023 01:47:28 +0800 Subject: power: supply: bq24190: Fix use after free bug in bq24190_remove due to race condition In bq24190_probe, &bdi->input_current_limit_work is bound with bq24190_input_current_limit_work. When external power changed, it will call bq24190_charger_external_power_changed to start the work. If we remove the module which will call bq24190_remove to make cleanup, there may be a unfinished work. The possible sequence is as follows: CPU0 CPUc1 |bq24190_input_current_limit_work bq24190_remove | power_supply_unregister | device_unregister | power_supply_dev_release| kfree(psy) | | | power_supply_get_property_from_supplier | //use Fix it by finishing the work before cleanup in the bq24190_remove Fixes: 97774672573a ("power_supply: Initialize changed_work before calling device_add") Signed-off-by: Zheng Wang Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq24190_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c index be34b9848450..de67b985f0a9 100644 --- a/drivers/power/supply/bq24190_charger.c +++ b/drivers/power/supply/bq24190_charger.c @@ -1906,6 +1906,7 @@ static void bq24190_remove(struct i2c_client *client) struct bq24190_dev_info *bdi = i2c_get_clientdata(client); int error; + cancel_delayed_work_sync(&bdi->input_current_limit_work); error = pm_runtime_resume_and_get(bdi->dev); if (error < 0) dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", error); -- cgit From 5cf9d015be160e2d90d29ae74ef1364390e8fce8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 8 Mar 2023 13:47:11 -0700 Subject: clk: Avoid invalid function names in CLK_OF_DECLARE() After commit c28cd1f3433c ("clk: Mark a fwnode as initialized when using CLK_OF_DECLARE() macro"), drivers/clk/mvebu/kirkwood.c fails to build: drivers/clk/mvebu/kirkwood.c:358:1: error: expected identifier or '(' CLK_OF_DECLARE(98dx1135_clk, "marvell,mv98dx1135-core-clock", ^ include/linux/clk-provider.h:1367:21: note: expanded from macro 'CLK_OF_DECLARE' static void __init name##_of_clk_init_declare(struct device_node *np) \ ^ :124:1: note: expanded from here 98dx1135_clk_of_clk_init_declare ^ drivers/clk/mvebu/kirkwood.c:358:1: error: invalid digit 'd' in decimal constant include/linux/clk-provider.h:1372:34: note: expanded from macro 'CLK_OF_DECLARE' OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) ^ :125:3: note: expanded from here 98dx1135_clk_of_clk_init_declare ^ drivers/clk/mvebu/kirkwood.c:358:1: error: invalid digit 'd' in decimal constant include/linux/clk-provider.h:1372:34: note: expanded from macro 'CLK_OF_DECLARE' OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) ^ :125:3: note: expanded from here 98dx1135_clk_of_clk_init_declare ^ drivers/clk/mvebu/kirkwood.c:358:1: error: invalid digit 'd' in decimal constant include/linux/clk-provider.h:1372:34: note: expanded from macro 'CLK_OF_DECLARE' OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) ^ :125:3: note: expanded from here 98dx1135_clk_of_clk_init_declare ^ C function names must start with either an alphabetic letter or an underscore. To avoid generating invalid function names from clock names, add two underscores to the beginning of the identifier. Fixes: c28cd1f3433c ("clk: Mark a fwnode as initialized when using CLK_OF_DECLARE() macro") Suggested-by: Saravana Kannan Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230308-clk_of_declare-fix-v1-1-317b741e2532@kernel.org Reviewed-by: Saravana Kannan Reported-by: Naresh Kamboju Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index c9f5276006a0..6f3175f0678a 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1364,12 +1364,12 @@ struct clk_hw_onecell_data { }; #define CLK_OF_DECLARE(name, compat, fn) \ - static void __init name##_of_clk_init_declare(struct device_node *np) \ + static void __init __##name##_of_clk_init_declare(struct device_node *np) \ { \ fn(np); \ fwnode_dev_initialized(of_fwnode_handle(np), true); \ } \ - OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) + OF_DECLARE_1(clk, name, compat, __##name##_of_clk_init_declare) /* * Use this macro when you have a driver that requires two initialization -- cgit From 4b1a2c2a8e0ddcb89c5f6c5003bd9b53142f69e3 Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Wed, 28 Sep 2022 11:13:50 -0700 Subject: scsi: core: Add BLIST_NO_VPD_SIZE for some VDASD Some storage, such as AIX VDASD (virtual storage) and IBM 2076 (front end), fail as a result of commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full page"). That commit changed getting SCSI VPD pages so that we now read just enough of the page to get the actual page size, then read the whole page in a second read. The problem is that the above mentioned hardware returns zero for the page size, because of a firmware error. In such cases, until the firmware is fixed, this new blacklist flag says to revert to the original method of reading the VPD pages, i.e. try to read a whole buffer's worth on the first try. [mkp: reworked somewhat] Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") Reported-by: Martin Wilck Suggested-by: Hannes Reinecke Signed-off-by: Lee Duncan Link: https://lore.kernel.org/r/20220928181350.9948-1-leeman.duncan@gmail.com Tested-by: Srikar Dronamraju Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 3 +++ drivers/scsi/scsi_devinfo.c | 3 ++- drivers/scsi/scsi_scan.c | 3 +++ include/scsi/scsi_device.h | 2 ++ include/scsi/scsi_devinfo.h | 6 +++--- 5 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 7d2210a006f0..5cce1ba70fc6 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -326,6 +326,9 @@ static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4); int result; + if (sdev->no_vpd_size) + return SCSI_DEFAULT_VPD_LEN; + /* * Fetch the VPD page header to find out how big the page * is. This is done to prevent problems on legacy devices diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index c7080454aea9..bc9d280417f6 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -134,7 +134,7 @@ static struct { {"3PARdata", "VV", NULL, BLIST_REPORTLUN2}, {"ADAPTEC", "AACRAID", NULL, BLIST_FORCELUN}, {"ADAPTEC", "Adaptec 5400S", NULL, BLIST_FORCELUN}, - {"AIX", "VDASD", NULL, BLIST_TRY_VPD_PAGES}, + {"AIX", "VDASD", NULL, BLIST_TRY_VPD_PAGES | BLIST_NO_VPD_SIZE}, {"AFT PRO", "-IX CF", "0.0>", BLIST_FORCELUN}, {"BELKIN", "USB 2 HS-CF", "1.95", BLIST_FORCELUN | BLIST_INQUIRY_36}, {"BROWNIE", "1200U3P", NULL, BLIST_NOREPORTLUN}, @@ -188,6 +188,7 @@ static struct { {"HPE", "OPEN-", "*", BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES}, {"IBM", "AuSaV1S2", NULL, BLIST_FORCELUN}, {"IBM", "ProFibre 4000R", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, + {"IBM", "2076", NULL, BLIST_NO_VPD_SIZE}, {"IBM", "2105", NULL, BLIST_RETRY_HWERROR}, {"iomega", "jaz 1GB", "J.86", BLIST_NOTQ | BLIST_NOLUN}, {"IOMEGA", "ZIP", NULL, BLIST_NOTQ | BLIST_NOLUN}, diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 4e842d79de31..d217be323cc6 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1057,6 +1057,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, else if (*bflags & BLIST_SKIP_VPD_PAGES) sdev->skip_vpd_pages = 1; + if (*bflags & BLIST_NO_VPD_SIZE) + sdev->no_vpd_size = 1; + transport_configure_device(&sdev->sdev_gendev); if (sdev->host->hostt->slave_configure) { diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index de310f21406c..f10a008e5bfa 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -145,6 +145,7 @@ struct scsi_device { const char * model; /* ... after scan; point to static string */ const char * rev; /* ... "nullnullnullnull" before scan */ +#define SCSI_DEFAULT_VPD_LEN 255 /* default SCSI VPD page size (max) */ struct scsi_vpd __rcu *vpd_pg0; struct scsi_vpd __rcu *vpd_pg83; struct scsi_vpd __rcu *vpd_pg80; @@ -215,6 +216,7 @@ struct scsi_device { * creation time */ unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */ unsigned silence_suspend:1; /* Do not print runtime PM related messages */ + unsigned no_vpd_size:1; /* No VPD size reported in header */ unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 5d14adae21c7..6b548dc2c496 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -32,7 +32,8 @@ #define BLIST_IGN_MEDIA_CHANGE ((__force blist_flags_t)(1ULL << 11)) /* do not do automatic start on add */ #define BLIST_NOSTARTONADD ((__force blist_flags_t)(1ULL << 12)) -#define __BLIST_UNUSED_13 ((__force blist_flags_t)(1ULL << 13)) +/* do not ask for VPD page size first on some broken targets */ +#define BLIST_NO_VPD_SIZE ((__force blist_flags_t)(1ULL << 13)) #define __BLIST_UNUSED_14 ((__force blist_flags_t)(1ULL << 14)) #define __BLIST_UNUSED_15 ((__force blist_flags_t)(1ULL << 15)) #define __BLIST_UNUSED_16 ((__force blist_flags_t)(1ULL << 16)) @@ -74,8 +75,7 @@ #define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \ (__force blist_flags_t) \ ((__force __u64)__BLIST_LAST_USED - 1ULL))) -#define __BLIST_UNUSED_MASK (__BLIST_UNUSED_13 | \ - __BLIST_UNUSED_14 | \ +#define __BLIST_UNUSED_MASK (__BLIST_UNUSED_14 | \ __BLIST_UNUSED_15 | \ __BLIST_UNUSED_16 | \ __BLIST_UNUSED_24 | \ -- cgit From be03df3d4bfe7e8866d4aa43d62e648ffe884f5f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 7 Mar 2023 13:44:28 -0800 Subject: scsi: core: Fix a procfs host directory removal regression scsi_proc_hostdir_rm() decreases a reference counter and hence must only be called once per host that is removed. This change does not require a scsi_add_host_with_dma() change since scsi_add_host_with_dma() will return 0 (success) if scsi_proc_host_add() is called. Fixes: fc663711b944 ("scsi: core: Remove the /proc/scsi/${proc_name} directory earlier") Cc: John Garry Reported-by: John Garry Link: https://lore.kernel.org/all/ed6b8027-a9d9-1b45-be8e-df4e8c6c4605@oracle.com/ Reported-by: syzbot+645a4616b87a2f10e398@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-scsi/000000000000890fab05f65342b6@google.com/ Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20230307214428.3703498-1-bvanassche@acm.org Tested-by: John Garry Tested-by: Shin'ichiro Kawasaki Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index f7f62e56afca..9b6fbbe15d92 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -341,9 +341,6 @@ static void scsi_host_dev_release(struct device *dev) struct Scsi_Host *shost = dev_to_shost(dev); struct device *parent = dev->parent; - /* In case scsi_remove_host() has not been called. */ - scsi_proc_hostdir_rm(shost->hostt); - /* Wait for functions invoked through call_rcu(&scmd->rcu, ...) */ rcu_barrier(); -- cgit From c6001025d53ab56d7159cf313313c6b5bd250380 Mon Sep 17 00:00:00 2001 From: Asutosh Das Date: Wed, 8 Mar 2023 15:13:23 -0800 Subject: scsi: ufs: mcq: Use active_reqs to check busy in clock scaling Multi Circular Queue doesn't use outstanding_reqs. However, the UFS clock scaling functions use outstanding_reqs to determine if there are requests pending. When MCQ is enabled, this check always returns false. Hence use active_reqs to check if there are pending requests. Fixes: eacb139b77ff ("scsi: ufs: core: mcq: Enable multi-circular queue") Signed-off-by: Asutosh Das Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/a24e0d646aac70eae0fc5e05fac0c58bb7e6e680.1678317160.git.quic_asutoshd@quicinc.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 05eac965ee27..37e178a9ac47 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1500,7 +1500,7 @@ start_window: scaling->window_start_t = curr_t; scaling->tot_busy_t = 0; - if (hba->outstanding_reqs) { + if (scaling->active_reqs) { scaling->busy_start_t = curr_t; scaling->is_busy_started = true; } else { @@ -2118,7 +2118,7 @@ static void ufshcd_clk_scaling_update_busy(struct ufs_hba *hba) spin_lock_irqsave(hba->host->host_lock, flags); hba->clk_scaling.active_reqs--; - if (!hba->outstanding_reqs && scaling->is_busy_started) { + if (!scaling->active_reqs && scaling->is_busy_started) { scaling->tot_busy_t += ktime_to_us(ktime_sub(ktime_get(), scaling->busy_start_t)); scaling->busy_start_t = 0; -- cgit From e0213434fe3e4a0d118923dc98d31e7ff1cd9e45 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 1 Mar 2023 20:00:52 -0500 Subject: tracing: Do not let histogram values have some modifiers Histogram values can not be strings, stacktraces, graphs, symbols, syscalls, or grouped in buckets or log. Give an error if a value is set to do so. Note, the histogram code was not prepared to handle these modifiers for histograms and caused a bug. Mark Rutland reported: # echo 'p:copy_to_user __arch_copy_to_user n=$arg2' >> /sys/kernel/tracing/kprobe_events # echo 'hist:keys=n:vals=hitcount.buckets=8:sort=hitcount' > /sys/kernel/tracing/events/kprobes/copy_to_user/trigger # cat /sys/kernel/tracing/events/kprobes/copy_to_user/hist [ 143.694628] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 143.695190] Mem abort info: [ 143.695362] ESR = 0x0000000096000004 [ 143.695604] EC = 0x25: DABT (current EL), IL = 32 bits [ 143.695889] SET = 0, FnV = 0 [ 143.696077] EA = 0, S1PTW = 0 [ 143.696302] FSC = 0x04: level 0 translation fault [ 143.702381] Data abort info: [ 143.702614] ISV = 0, ISS = 0x00000004 [ 143.702832] CM = 0, WnR = 0 [ 143.703087] user pgtable: 4k pages, 48-bit VAs, pgdp=00000000448f9000 [ 143.703407] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000 [ 143.704137] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [ 143.704714] Modules linked in: [ 143.705273] CPU: 0 PID: 133 Comm: cat Not tainted 6.2.0-00003-g6fc512c10a7c #3 [ 143.706138] Hardware name: linux,dummy-virt (DT) [ 143.706723] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 143.707120] pc : hist_field_name.part.0+0x14/0x140 [ 143.707504] lr : hist_field_name.part.0+0x104/0x140 [ 143.707774] sp : ffff800008333a30 [ 143.707952] x29: ffff800008333a30 x28: 0000000000000001 x27: 0000000000400cc0 [ 143.708429] x26: ffffd7a653b20260 x25: 0000000000000000 x24: ffff10d303ee5800 [ 143.708776] x23: ffffd7a6539b27b0 x22: ffff10d303fb8c00 x21: 0000000000000001 [ 143.709127] x20: ffff10d303ec2000 x19: 0000000000000000 x18: 0000000000000000 [ 143.709478] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 143.709824] x14: 0000000000000000 x13: 203a6f666e692072 x12: 6567676972742023 [ 143.710179] x11: 0a230a6d6172676f x10: 000000000000002c x9 : ffffd7a6521e018c [ 143.710584] x8 : 000000000000002c x7 : 7f7f7f7f7f7f7f7f x6 : 000000000000002c [ 143.710915] x5 : ffff10d303b0103e x4 : ffffd7a653b20261 x3 : 000000000000003d [ 143.711239] x2 : 0000000000020001 x1 : 0000000000000001 x0 : 0000000000000000 [ 143.711746] Call trace: [ 143.712115] hist_field_name.part.0+0x14/0x140 [ 143.712642] hist_field_name.part.0+0x104/0x140 [ 143.712925] hist_field_print+0x28/0x140 [ 143.713125] event_hist_trigger_print+0x174/0x4d0 [ 143.713348] hist_show+0xf8/0x980 [ 143.713521] seq_read_iter+0x1bc/0x4b0 [ 143.713711] seq_read+0x8c/0xc4 [ 143.713876] vfs_read+0xc8/0x2a4 [ 143.714043] ksys_read+0x70/0xfc [ 143.714218] __arm64_sys_read+0x24/0x30 [ 143.714400] invoke_syscall+0x50/0x120 [ 143.714587] el0_svc_common.constprop.0+0x4c/0x100 [ 143.714807] do_el0_svc+0x44/0xd0 [ 143.714970] el0_svc+0x2c/0x84 [ 143.715134] el0t_64_sync_handler+0xbc/0x140 [ 143.715334] el0t_64_sync+0x190/0x194 [ 143.715742] Code: a9bd7bfd 910003fd a90153f3 aa0003f3 (f9400000) [ 143.716510] ---[ end trace 0000000000000000 ]--- Segmentation fault Link: https://lkml.kernel.org/r/20230302020810.559462599@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Andrew Morton Fixes: c6afad49d127f ("tracing: Add hist trigger 'sym' and 'sym-offset' modifiers") Reported-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 89877a18f933..6e8ab726a7b5 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4235,6 +4235,15 @@ static int __create_val_field(struct hist_trigger_data *hist_data, goto out; } + /* Some types cannot be a value */ + if (hist_field->flags & (HIST_FIELD_FL_GRAPH | HIST_FIELD_FL_PERCENT | + HIST_FIELD_FL_BUCKET | HIST_FIELD_FL_LOG2 | + HIST_FIELD_FL_SYM | HIST_FIELD_FL_SYM_OFFSET | + HIST_FIELD_FL_SYSCALL | HIST_FIELD_FL_STACKTRACE)) { + hist_err(file->tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(field_str)); + ret = -EINVAL; + } + hist_data->fields[val_idx] = hist_field; ++hist_data->n_vals; -- cgit From 9f116f76fa8c04c81aef33ad870dbf9a158e5b70 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 1 Mar 2023 20:00:53 -0500 Subject: tracing: Check field value in hist_field_name() The function hist_field_name() cannot handle being passed a NULL field parameter. It should never be NULL, but due to a previous bug, NULL was passed to the function and the kernel crashed due to a NULL dereference. Mark Rutland reported this to me on IRC. The bug was fixed, but to prevent future bugs from crashing the kernel, check the field and add a WARN_ON() if it is NULL. Link: https://lkml.kernel.org/r/20230302020810.762384440@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Andrew Morton Reported-by: Mark Rutland Fixes: c6afad49d127f ("tracing: Add hist trigger 'sym' and 'sym-offset' modifiers") Tested-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 6e8ab726a7b5..486cca3c2b75 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1331,6 +1331,9 @@ static const char *hist_field_name(struct hist_field *field, { const char *field_name = ""; + if (WARN_ON_ONCE(!field)) + return field_name; + if (level > 1) return field_name; -- cgit From ee92fa443358f4fc0017c1d0d325c27b37802504 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Thu, 9 Mar 2023 16:02:30 +0800 Subject: ftrace: Fix invalid address access in lookup_rec() when index is 0 KASAN reported follow problem: BUG: KASAN: use-after-free in lookup_rec Read of size 8 at addr ffff000199270ff0 by task modprobe CPU: 2 Comm: modprobe Call trace: kasan_report __asan_load8 lookup_rec ftrace_location arch_check_ftrace_location check_kprobe_address_safe register_kprobe When checking pg->records[pg->index - 1].ip in lookup_rec(), it can get a pg which is newly added to ftrace_pages_start in ftrace_process_locs(). Before the first pg->index++, index is 0 and accessing pg->records[-1].ip will cause this problem. Don't check the ip when pg->index is 0. Link: https://lore.kernel.org/linux-trace-kernel/20230309080230.36064-1-chenzhongjin@huawei.com Cc: stable@vger.kernel.org Fixes: 9644302e3315 ("ftrace: Speed up search by skipping pages by address") Suggested-by: Steven Rostedt (Google) Signed-off-by: Chen Zhongjin Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 750aa3f08b25..a47f7d93e32d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1537,7 +1537,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end) key.flags = end; /* overload flags, as it is unsigned long */ for (pg = ftrace_pages_start; pg; pg = pg->next) { - if (end < pg->records[0].ip || + if (pg->index == 0 || + end < pg->records[0].ip || start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) continue; rec = bsearch(&key, pg->records, pg->index, -- cgit From aa69f814920d85a2d4cfd5c294757c3d59d2fba6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 31 Jan 2023 10:36:30 +0100 Subject: ftrace,kcfi: Define ftrace_stub_graph conditionally When CONFIG_FUNCTION_GRAPH_TRACER is disabled, __kcfi_typeid_ftrace_stub_graph is missing, causing a link failure: ld.lld: error: undefined symbol: __kcfi_typeid_ftrace_stub_graph referenced by arch/x86/kernel/ftrace_64.o:(__cfi_ftrace_stub_graph) in archive vmlinux.a Mark the reference to it as conditional on the same symbol, as is done on arm64. Link: https://lore.kernel.org/linux-trace-kernel/20230131093643.3850272-1-arnd@kernel.org Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Josh Poimboeuf Fixes: 883bbbffa5a4 ("ftrace,kcfi: Separate ftrace_stub() and ftrace_stub_graph()") See-also: 2598ac6ec493 ("arm64: ftrace: Define ftrace_stub_graph only with FUNCTION_GRAPH_TRACER") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (Google) --- arch/x86/kernel/ftrace_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 1265ad519249..fb4f1e01b64a 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -136,10 +136,12 @@ SYM_TYPED_FUNC_START(ftrace_stub) RET SYM_FUNC_END(ftrace_stub) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_TYPED_FUNC_START(ftrace_stub_graph) CALL_DEPTH_ACCOUNT RET SYM_FUNC_END(ftrace_stub_graph) +#endif #ifdef CONFIG_DYNAMIC_FTRACE -- cgit From bced3f7db95ff2e6ca29dc4d1c9751ab5e736a09 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 8 Mar 2023 11:07:45 -0800 Subject: tcp: tcp_make_synack() can be called from process context tcp_rtx_synack() now could be called in process context as explained in 0a375c822497 ("tcp: tcp_rtx_synack() can be called from process context"). tcp_rtx_synack() might call tcp_make_synack(), which will touch per-CPU variables with preemption enabled. This causes the following BUG: BUG: using __this_cpu_add() in preemptible [00000000] code: ThriftIO1/5464 caller is tcp_make_synack+0x841/0xac0 Call Trace: dump_stack_lvl+0x10d/0x1a0 check_preemption_disabled+0x104/0x110 tcp_make_synack+0x841/0xac0 tcp_v6_send_synack+0x5c/0x450 tcp_rtx_synack+0xeb/0x1f0 inet_rtx_syn_ack+0x34/0x60 tcp_check_req+0x3af/0x9e0 tcp_rcv_state_process+0x59b/0x2030 tcp_v6_do_rcv+0x5f5/0x700 release_sock+0x3a/0xf0 tcp_sendmsg+0x33/0x40 ____sys_sendmsg+0x2f2/0x490 __sys_sendmsg+0x184/0x230 do_syscall_64+0x3d/0x90 Avoid calling __TCP_INC_STATS() with will touch per-cpu variables. Use TCP_INC_STATS() which is safe to be called from context switch. Fixes: 8336886f786f ("tcp: TCP Fast Open Server - support TFO listeners") Signed-off-by: Breno Leitao Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230308190745.780221-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 71d01cf3c13e..ba839e441450 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3605,7 +3605,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, th->window = htons(min(req->rsk_rcv_wnd, 65535U)); tcp_options_write(th, NULL, &opts); th->doff = (tcp_header_size >> 2); - __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ -- cgit From aed8efddd39b3434c96718d39009285c52b1cafc Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Tue, 14 Feb 2023 16:09:24 +0800 Subject: vp_vdpa: fix the crash in hot unplug with vp_vdpa While unplugging the vp_vdpa device, it triggers a kernel panic The root cause is: vdpa_mgmtdev_unregister() will accesses modern devices which will cause a use after free. So need to change the sequence in vp_vdpa_remove [ 195.003359] BUG: unable to handle page fault for address: ff4e8beb80199014 [ 195.004012] #PF: supervisor read access in kernel mode [ 195.004486] #PF: error_code(0x0000) - not-present page [ 195.004960] PGD 100000067 P4D 1001b6067 PUD 1001b7067 PMD 1001b8067 PTE 0 [ 195.005578] Oops: 0000 1 PREEMPT SMP PTI [ 195.005968] CPU: 13 PID: 164 Comm: kworker/u56:10 Kdump: loaded Not tainted 5.14.0-252.el9.x86_64 #1 [ 195.006792] Hardware name: Red Hat KVM/RHEL, BIOS edk2-20221207gitfff6d81270b5-2.el9 unknown [ 195.007556] Workqueue: kacpi_hotplug acpi_hotplug_work_fn [ 195.008059] RIP: 0010:ioread8+0x31/0x80 [ 195.008418] Code: 77 28 48 81 ff 00 00 01 00 76 0b 89 fa ec 0f b6 c0 c3 cc cc cc cc 8b 15 ad 72 93 01 b8 ff 00 00 00 85 d2 75 0f c3 cc cc cc cc <8a> 07 0f b6 c0 c3 cc cc cc cc 83 ea 01 48 83 ec 08 48 89 fe 48 c7 [ 195.010104] RSP: 0018:ff4e8beb8067bab8 EFLAGS: 00010292 [ 195.010584] RAX: ffffffffc05834a0 RBX: ffffffffc05843c0 RCX: ff4e8beb8067bae0 [ 195.011233] RDX: ff1bcbd580f88000 RSI: 0000000000000246 RDI: ff4e8beb80199014 [ 195.011881] RBP: ff1bcbd587e39000 R08: ffffffff916fa2d0 R09: ff4e8beb8067ba68 [ 195.012527] R10: 000000000000001c R11: 0000000000000000 R12: ff1bcbd5a3de9120 [ 195.013179] R13: ffffffffc062d000 R14: 0000000000000080 R15: ff1bcbe402bc7805 [ 195.013826] FS: 0000000000000000(0000) GS:ff1bcbe402740000(0000) knlGS:0000000000000000 [ 195.014564] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 195.015093] CR2: ff4e8beb80199014 CR3: 0000000107dea002 CR4: 0000000000771ee0 [ 195.015741] PKRU: 55555554 [ 195.016001] Call Trace: [ 195.016233] [ 195.016434] vp_modern_get_status+0x12/0x20 [ 195.016823] vp_vdpa_reset+0x1b/0x50 [vp_vdpa] [ 195.017238] virtio_vdpa_reset+0x3c/0x48 [virtio_vdpa] [ 195.017709] remove_vq_common+0x1f/0x3a0 [virtio_net] [ 195.018178] virtnet_remove+0x5d/0x70 [virtio_net] [ 195.018618] virtio_dev_remove+0x3d/0x90 [ 195.018986] device_release_driver_internal+0x1aa/0x230 [ 195.019466] bus_remove_device+0xd8/0x150 [ 195.019841] device_del+0x18b/0x3f0 [ 195.020167] ? kernfs_find_ns+0x35/0xd0 [ 195.020526] device_unregister+0x13/0x60 [ 195.020894] unregister_virtio_device+0x11/0x20 [ 195.021311] device_release_driver_internal+0x1aa/0x230 [ 195.021790] bus_remove_device+0xd8/0x150 [ 195.022162] device_del+0x18b/0x3f0 [ 195.022487] device_unregister+0x13/0x60 [ 195.022852] ? vdpa_dev_remove+0x30/0x30 [vdpa] [ 195.023270] vp_vdpa_dev_del+0x12/0x20 [vp_vdpa] [ 195.023694] vdpa_match_remove+0x2b/0x40 [vdpa] [ 195.024115] bus_for_each_dev+0x78/0xc0 [ 195.024471] vdpa_mgmtdev_unregister+0x65/0x80 [vdpa] [ 195.024937] vp_vdpa_remove+0x23/0x40 [vp_vdpa] [ 195.025353] pci_device_remove+0x36/0xa0 [ 195.025719] device_release_driver_internal+0x1aa/0x230 [ 195.026201] pci_stop_bus_device+0x6c/0x90 [ 195.026580] pci_stop_and_remove_bus_device+0xe/0x20 [ 195.027039] disable_slot+0x49/0x90 [ 195.027366] acpiphp_disable_and_eject_slot+0x15/0x90 [ 195.027832] hotplug_event+0xea/0x210 [ 195.028171] ? hotplug_event+0x210/0x210 [ 195.028535] acpiphp_hotplug_notify+0x22/0x80 [ 195.028942] ? hotplug_event+0x210/0x210 [ 195.029303] acpi_device_hotplug+0x8a/0x1d0 [ 195.029690] acpi_hotplug_work_fn+0x1a/0x30 [ 195.030077] process_one_work+0x1e8/0x3c0 [ 195.030451] worker_thread+0x50/0x3b0 [ 195.030791] ? rescuer_thread+0x3a0/0x3a0 [ 195.031165] kthread+0xd9/0x100 [ 195.031459] ? kthread_complete_and_exit+0x20/0x20 [ 195.031899] ret_from_fork+0x22/0x30 [ 195.032233] Fixes: ffbda8e9df10 ("vdpa/vp_vdpa : add vdpa tool support in vp_vdpa") Tested-by: Lei Yang Cc: stable@vger.kernel.org Signed-off-by: Cindy Lu Message-Id: <20230214080924.131462-1-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 8fe267ca3e76..281287fae89f 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -645,8 +645,8 @@ static void vp_vdpa_remove(struct pci_dev *pdev) struct virtio_pci_modern_device *mdev = NULL; mdev = vp_vdpa_mgtdev->mdev; - vp_modern_remove(mdev); vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev); + vp_modern_remove(mdev); kfree(vp_vdpa_mgtdev->mgtdev.id_table); kfree(mdev); kfree(vp_vdpa_mgtdev); -- cgit From 09e65ee9059d76b89cb713795748805efd3f50c6 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Tue, 14 Feb 2023 17:30:40 -0800 Subject: vdpa/mlx5: should not activate virtq object when suspended Otherwise the virtqueue object to instate could point to invalid address that was unmapped from the MTT: mlx5_core 0000:41:04.2: mlx5_cmd_out_err:782:(pid 8321): CREATE_GENERAL_OBJECT(0xa00) op_mod(0xd) failed, status bad parameter(0x3), syndrome (0x5fa1c), err(-22) Fixes: cae15c2ed8e6 ("vdpa/mlx5: Implement susupend virtqueue callback") Cc: Eli Cohen Signed-off-by: Si-Wei Liu Reviewed-by: Eli Cohen Message-Id: <1676424640-11673-1-git-send-email-si-wei.liu@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 + drivers/vdpa/mlx5/net/mlx5_vnet.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 058fbe28107e..25fc4120b618 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -96,6 +96,7 @@ struct mlx5_vdpa_dev { struct mlx5_control_vq cvq; struct workqueue_struct *wq; unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; + bool suspended; }; int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 3a0e721aef05..520646ae7fa0 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2438,7 +2438,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, if (err) goto err_mr; - if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) goto err_mr; restore_channels_info(ndev); @@ -2606,6 +2606,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) clear_vqs_ready(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; + ndev->mvdev.suspended = false; ndev->cur_num_vqs = 0; ndev->mvdev.cvq.received_desc = 0; ndev->mvdev.cvq.completed_desc = 0; @@ -2852,6 +2853,8 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) struct mlx5_vdpa_virtqueue *mvq; int i; + mlx5_vdpa_info(mvdev, "suspending device\n"); + down_write(&ndev->reslock); ndev->nb_registered = false; mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); @@ -2861,6 +2864,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) suspend_vq(ndev, mvq); } mlx5_vdpa_cvq_suspend(mvdev); + mvdev->suspended = true; up_write(&ndev->reslock); return 0; } -- cgit From 06be62083c5308c76a891ca975d66d832e2afc07 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Fri, 10 Mar 2023 09:48:45 +0000 Subject: nvmem: core: return -ENOENT if nvmem cell is not found Prior to commit 5d8e6e6c10a3 ("nvmem: core: add an index parameter to the cell") of_nvmem_cell_get() would return -ENOENT if the cell wasn't found. Particularly, if of_property_match_string() returned -EINVAL, that return code was passed as the index to of_parse_phandle(), which then detected it as invalid and returned NULL. That led to an return code of -ENOENT. With the new code, the negative index will lead to an -EINVAL of of_parse_phandle_with_optional_args() which pass straight to the caller and break those who expect an -ENOENT. Fix it by always returning -ENOENT. Fixes: 5d8e6e6c10a3 ("nvmem: core: add an index parameter to the cell") Reported-by: Alexander Stein Link: https://lore.kernel.org/r/2143916.GUh0CODmnK@steina-w/ Signed-off-by: Michael Walle Tested-by: Alexander Stein Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230310094845.139400-1-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 174ef3574e07..22024b830788 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -1231,7 +1231,7 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id) "#nvmem-cell-cells", index, &cell_spec); if (ret) - return ERR_PTR(ret); + return ERR_PTR(-ENOENT); if (cell_spec.args_count > 1) return ERR_PTR(-EINVAL); -- cgit From f624bb6fad23df3270580b4fcef415c6e7bf7705 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:29 +0200 Subject: wifi: nl80211: fix NULL-ptr deref in offchan check If, e.g. in AP mode, the link was already created by userspace but not activated yet, it has a chandef but the chandef isn't valid and has no channel. Check for this and ignore this link. Fixes: 7b0a0e3c3a88 ("wifi: cfg80211: do some rework towards MLO link APIs") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.71bd4803fbb9.Iee39c0f6c2d3a59a8227674dc55d52e38b1090cf@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 112b4bb009c8..51f6582eff7b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8901,7 +8901,7 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, struct cfg80211_chan_def *chandef; chandef = wdev_chandef(wdev, link_id); - if (!chandef) + if (!chandef || !chandef->chan) continue; /* -- cgit From b27f07c50a73e34eefb6b1030b235192b7ded850 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Feb 2023 13:36:57 +0100 Subject: wifi: nl80211: fix puncturing bitmap policy This was meant to be a u32, and while applying the patch I tried to use policy validation for it. However, not only did I copy/paste it to u8 instead of u32, but also used the policy range erroneously. Fix both of these issues. Fixes: d7c1a9a0ed18 ("wifi: nl80211: validate and configure puncturing bitmap") Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 51f6582eff7b..6869781283e2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -462,6 +462,11 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = { [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, }; +static struct netlink_range_validation nl80211_punct_bitmap_range = { + .min = 0, + .max = 0xffff, +}; + static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -805,7 +810,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN), [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT }, - [NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_RANGE(NLA_U8, 0, 0xffff), + [NL80211_ATTR_PUNCT_BITMAP] = + NLA_POLICY_FULL_RANGE(NLA_U32, &nl80211_punct_bitmap_range), }; /* policy for the key attributes */ -- cgit From ce04abc3fcc62cd5640af981ebfd7c4dc3bded28 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Feb 2023 10:52:19 +0100 Subject: wifi: mac80211: check basic rates validity When userspace sets basic rates, it might send us some rates list that's empty or consists of invalid values only. We're currently ignoring invalid values and then may end up with a rates bitmap that's empty, which later results in a warning. Reject the call if there were no valid rates. Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8eb342300868..d3d861911ed6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2611,6 +2611,17 @@ static int ieee80211_change_bss(struct wiphy *wiphy, if (!sband) return -EINVAL; + if (params->basic_rates) { + if (!ieee80211_parse_bitrates(link->conf->chandef.width, + wiphy->bands[sband->band], + params->basic_rates, + params->basic_rates_len, + &link->conf->basic_rates)) + return -EINVAL; + changed |= BSS_CHANGED_BASIC_RATES; + ieee80211_check_rate_mask(link); + } + if (params->use_cts_prot >= 0) { link->conf->use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; @@ -2632,16 +2643,6 @@ static int ieee80211_change_bss(struct wiphy *wiphy, changed |= BSS_CHANGED_ERP_SLOT; } - if (params->basic_rates) { - ieee80211_parse_bitrates(link->conf->chandef.width, - wiphy->bands[sband->band], - params->basic_rates, - params->basic_rates_len, - &link->conf->basic_rates); - changed |= BSS_CHANGED_BASIC_RATES; - ieee80211_check_rate_mask(link); - } - if (params->ap_isolate >= 0) { if (params->ap_isolate) sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS; -- cgit From 96c069508377547f913e7265a80fffe9355de592 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Mar 2023 12:09:33 +0200 Subject: wifi: cfg80211: fix MLO connection ownership When disconnecting from an MLO connection we need the AP MLD address, not an arbitrary BSSID. Fix the code to do that. Fixes: 9ecff10e82a5 ("wifi: nl80211: refactor BSS lookup in nl80211_associate()") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230301115906.4c1b3b18980e.I008f070c7f3b8e8bde9278101ef9e40706a82902@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6869781283e2..4f63059efd81 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10799,8 +10799,7 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, - struct nlattr **attrs, - const u8 **bssid_out) + struct nlattr **attrs) { struct ieee80211_channel *chan; struct cfg80211_bss *bss; @@ -10827,7 +10826,6 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device if (!bss) return ERR_PTR(-ENOENT); - *bssid_out = bssid; return bss; } @@ -10837,7 +10835,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct cfg80211_assoc_request req = {}; struct nlattr **attrs = NULL; - const u8 *bssid, *ssid; + const u8 *ap_addr, *ssid; unsigned int link_id; int err, ssid_len; @@ -10974,6 +10972,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) return -EINVAL; req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); + ap_addr = req.ap_mld_addr; attrs = kzalloc(attrsize, GFP_KERNEL); if (!attrs) @@ -10999,8 +10998,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) goto free; } req.links[link_id].bss = - nl80211_assoc_bss(rdev, ssid, ssid_len, attrs, - &bssid); + nl80211_assoc_bss(rdev, ssid, ssid_len, attrs); if (IS_ERR(req.links[link_id].bss)) { err = PTR_ERR(req.links[link_id].bss); req.links[link_id].bss = NULL; @@ -11051,10 +11049,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (req.link_id >= 0) return -EINVAL; - req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs, - &bssid); + req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs); if (IS_ERR(req.bss)) return PTR_ERR(req.bss); + ap_addr = req.bss->bssid; } err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); @@ -11067,7 +11065,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; memcpy(dev->ieee80211_ptr->disconnect_bssid, - bssid, ETH_ALEN); + ap_addr, ETH_ALEN); } wdev_unlock(dev->ieee80211_ptr); -- cgit From c7d9e628b8ff4d52a365a441bdacb3209ee83c81 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Mar 2023 12:15:24 +0100 Subject: efi/libstub: zboot: Mark zboot EFI application as NX compatible Now that the zboot loader will invoke the EFI memory attributes protocol to remap the decompressed code and rodata as read-only/executable, we can set the PE/COFF header flag that indicates to the firmware that the application does not rely on writable memory being executable at the same time. Cc: # v6.2+ Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/zboot-header.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/zboot-header.S b/drivers/firmware/efi/libstub/zboot-header.S index ec4525d40e0c..445cb646eaaa 100644 --- a/drivers/firmware/efi/libstub/zboot-header.S +++ b/drivers/firmware/efi/libstub/zboot-header.S @@ -63,7 +63,7 @@ __efistub_efi_zboot_header: .long .Lefi_header_end - .Ldoshdr .long 0 .short IMAGE_SUBSYSTEM_EFI_APPLICATION - .short 0 + .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT #ifdef CONFIG_64BIT .quad 0, 0, 0, 0 #else -- cgit From 3c60f67b4bd1bc01fa9194e9dc925ac6cb56156c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Mar 2023 12:55:41 +0100 Subject: efi/libstub: arm64: Remap relocated image with strict permissions After relocating the executable image, use the EFI memory attributes protocol to remap the code and data regions with the appropriate permissions. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64-stub.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index d4a6b12a8741..b996553cdb4c 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -139,6 +139,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, *image_addr = *reserve_addr; memcpy((void *)*image_addr, _text, kernel_size); caches_clean_inval_pou(*image_addr, *image_addr + kernel_codesize); + efi_remap_image(*image_addr, *reserve_size, kernel_codesize); return EFI_SUCCESS; } -- cgit From 3c66bb1918c262dd52fb4221a8d372619c5da70a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 10 Mar 2023 13:30:05 +0100 Subject: arm64: efi: Set NX compat flag in PE/COFF header The PE/COFF header has a NX compat flag which informs the firmware that the application does not rely on memory regions being mapped with both executable and writable permissions at the same time. This is typically used by the firmware to decide whether it can set the NX attribute on all allocations it returns, but going forward, it may be used to enforce a policy that only permits applications with the NX flag set to be loaded to begin wiht in some configurations, e.g., when Secure Boot is in effect. Even though the arm64 version of the EFI stub may relocate the kernel before executing it, it always did so after disabling the MMU, and so we were always in line with what the NX compat flag conveys, we just never bothered to set it. So let's set the flag now. Cc: Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi-header.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S index 28d8a5dca5f1..d731b4655df8 100644 --- a/arch/arm64/kernel/efi-header.S +++ b/arch/arm64/kernel/efi-header.S @@ -66,7 +66,7 @@ .long .Lefi_header_end - .L_head // SizeOfHeaders .long 0 // CheckSum .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem - .short 0 // DllCharacteristics + .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics .quad 0 // SizeOfStackReserve .quad 0 // SizeOfStackCommit .quad 0 // SizeOfHeapReserve -- cgit From fe9ae05cfbe587dda724fcf537c00bc2f287da62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 Mar 2023 11:50:12 +0100 Subject: fbdev: Fix incorrect page mapping clearance at fb_deferred_io_release() The recent fix for the deferred I/O by the commit 3efc61d95259 ("fbdev: Fix invalid page access after closing deferred I/O devices") caused a regression when the same fb device is opened/closed while it's being used. It resulted in a frozen screen even if something is redrawn there after the close. The breakage is because the patch was made under a wrong assumption of a single open; in the current code, fb_deferred_io_release() cleans up the page mapping of the pageref list and it calls cancel_delayed_work_sync() unconditionally, where both are no correct behavior for multiple opens. This patch adds a refcount for the opens of the device, and applies the cleanup only when all files get closed. As both fb_deferred_io_open() and _close() are called always in the fb_info lock (mutex), it's safe to use the normal int for the refcounting. Also, a useless BUG_ON() is dropped. Fixes: 3efc61d95259 ("fbdev: Fix invalid page access after closing deferred I/O devices") Cc: Signed-off-by: Takashi Iwai Reviewed-by: Patrik Jakobsson Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230308105012.1845-1-tiwai@suse.de --- drivers/video/fbdev/core/fb_defio.c | 17 +++++++++++++---- include/linux/fb.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index 583cbcf09446..a3cf1f764f29 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -309,17 +309,18 @@ void fb_deferred_io_open(struct fb_info *info, struct inode *inode, struct file *file) { + struct fb_deferred_io *fbdefio = info->fbdefio; + file->f_mapping->a_ops = &fb_deferred_io_aops; + fbdefio->open_count++; } EXPORT_SYMBOL_GPL(fb_deferred_io_open); -void fb_deferred_io_release(struct fb_info *info) +static void fb_deferred_io_lastclose(struct fb_info *info) { - struct fb_deferred_io *fbdefio = info->fbdefio; struct page *page; int i; - BUG_ON(!fbdefio); cancel_delayed_work_sync(&info->deferred_work); /* clear out the mapping that we setup */ @@ -328,13 +329,21 @@ void fb_deferred_io_release(struct fb_info *info) page->mapping = NULL; } } + +void fb_deferred_io_release(struct fb_info *info) +{ + struct fb_deferred_io *fbdefio = info->fbdefio; + + if (!--fbdefio->open_count) + fb_deferred_io_lastclose(info); +} EXPORT_SYMBOL_GPL(fb_deferred_io_release); void fb_deferred_io_cleanup(struct fb_info *info) { struct fb_deferred_io *fbdefio = info->fbdefio; - fb_deferred_io_release(info); + fb_deferred_io_lastclose(info); kvfree(info->pagerefs); mutex_destroy(&fbdefio->lock); diff --git a/include/linux/fb.h b/include/linux/fb.h index 73eb1f85ea8e..05e40fcc7696 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -212,6 +212,7 @@ struct fb_deferred_io { /* delay between mkwrite and deferred handler */ unsigned long delay; bool sort_pagereflist; /* sort pagelist by offset */ + int open_count; /* number of opened files; protected by fb_info lock */ struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ /* callback */ -- cgit From e57d06527738798039b8e91af762fbd33881b34d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Mar 2023 09:45:09 -0500 Subject: NFS & NFSD: Update GSS dependencies Geert reports that: > On v6.2, "make ARCH=m68k defconfig" gives you > CONFIG_RPCSEC_GSS_KRB5=m > On v6.3, it became builtin, due to dropping the dependencies on > the individual crypto modules. > > $ grep -E "CRYPTO_(MD5|DES|CBC|CTS|ECB|HMAC|SHA1|AES)" .config > CONFIG_CRYPTO_AES=y > CONFIG_CRYPTO_AES_TI=m > CONFIG_CRYPTO_DES=m > CONFIG_CRYPTO_CBC=m > CONFIG_CRYPTO_CTS=m > CONFIG_CRYPTO_ECB=m > CONFIG_CRYPTO_HMAC=m > CONFIG_CRYPTO_MD5=m > CONFIG_CRYPTO_SHA1=m This behavior is triggered by the "default y" in the definition of RPCSEC_GSS. The "default y" was added in 2010 by commit df486a25900f ("NFS: Fix the selection of security flavours in Kconfig"). However, svc_gss_principal was removed in 2012 by commit 03a4e1f6ddf2 ("nfsd4: move principal name into svc_cred"), so the 2010 fix is no longer necessary. We can safely change the NFS_V4 and NFSD_V4 dependencies back to RPCSEC_GSS_KRB5 to get the nicer v6.2 behavior back. Selecting KRB5 symbolically represents the true requirement here: that all spec-compliant NFSv4 implementations must have Kerberos available to use. Reported-by: Geert Uytterhoeven Fixes: dfe9a123451a ("SUNRPC: Enable rpcsec_gss_krb5.ko to be built without CRYPTO_DES") Signed-off-by: Chuck Lever --- fs/nfs/Kconfig | 2 +- fs/nfsd/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 14a72224b657..450d6c3bc05e 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -75,7 +75,7 @@ config NFS_V3_ACL config NFS_V4 tristate "NFS client support for NFS version 4" depends on NFS_FS - select SUNRPC_GSS + select RPCSEC_GSS_KRB5 select KEYS help This option enables support for version 4 of the NFS protocol diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 7c441f2bd444..43b88eaf0673 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -73,7 +73,7 @@ config NFSD_V4 bool "NFS server support for NFS version 4" depends on NFSD && PROC_FS select FS_POSIX_ACL - select SUNRPC_GSS + select RPCSEC_GSS_KRB5 select CRYPTO select CRYPTO_MD5 select CRYPTO_SHA256 -- cgit From d6f7ff9dd387861fa30cbc6375d15b586da17d33 Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Wed, 8 Mar 2023 16:48:36 -0800 Subject: libbpf: Revert poisoning of strlcpy This reverts commit 6d0c4b11e743("libbpf: Poison strlcpy()"). It added the pragma poison directive to libbpf_internal.h to protect against accidental usage of strlcpy but ended up breaking the build for toolchains based on libcs which provide the strlcpy() declaration from string.h (e.g. uClibc-ng). The include order which causes the issue is: string.h, from Iibbpf_common.h:12, from libbpf.h:20, from libbpf_internal.h:26, from strset.c:9: Fixes: 6d0c4b11e743 ("libbpf: Poison strlcpy()") Signed-off-by: Jesus Sanchez-Palencia Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230309004836.2808610-1-jesussanp@google.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index fbaf68335394..e4d05662a96c 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -20,8 +20,8 @@ /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -/* prevent accidental re-addition of reallocarray()/strlcpy() */ -#pragma GCC poison reallocarray strlcpy +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray #include "libbpf.h" #include "btf.h" -- cgit From 624c60f326c6e5a80b008e8a5c7feffe8c27dc72 Mon Sep 17 00:00:00 2001 From: Guillaume Tucker Date: Tue, 9 Aug 2022 16:22:31 +0200 Subject: selftests: fix LLVM build for i386 and x86_64 Add missing cases for the i386 and x86_64 architectures when determining the LLVM target for building kselftest. Fixes: 795285ef2425 ("selftests: Fix clang cross compilation") Signed-off-by: Guillaume Tucker Reviewed-by: Nathan Chancellor Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index f7900e75d230..05400462c779 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -10,12 +10,14 @@ endif CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl +CLANG_TARGET_FLAGS_i386 := i386-linux-gnu CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu +CLANG_TARGET_FLAGS_x86_64 := x86_64-linux-gnu CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH)) ifeq ($(CROSS_COMPILE),) -- cgit From 32513d40d908b267508d37994753d9bd1600914b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Mar 2023 12:41:18 -0800 Subject: selftests/bpf: Fix progs/find_vma_fail1.c build error. The commit 11e456cae91e ("selftests/bpf: Fix compilation errors: Assign a value to a constant") fixed the issue cleanly in bpf-next. This is an alternative fix in bpf tree to avoid merge conflict between bpf and bpf-next. Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/find_vma_fail1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c index b3b326b8e2d1..6dab9cffda13 100644 --- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c +++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include "vmlinux.h" #include +#define vm_flags vm_start char _license[] SEC("license") = "GPL"; -- cgit From e8c8361cfdbf450f760e8a2bdbd4222d1947366b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Mar 2023 12:47:51 -0800 Subject: selftests/bpf: Fix progs/test_deny_namespace.c issues. The following build error can be seen: progs/test_deny_namespace.c:22:19: error: call to undeclared function 'BIT_LL'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN); The struct kernel_cap_struct no longer exists in the kernel as well. Adjust bpf prog to fix both issues. Fixes: f122a08b197d ("capability: just use a 'u64' instead of a 'u32[2]' array") Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_deny_namespace.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c index 591104e79812..e96b901a733c 100644 --- a/tools/testing/selftests/bpf/progs/test_deny_namespace.c +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c @@ -5,12 +5,10 @@ #include #include -struct kernel_cap_struct { - __u64 val; -} __attribute__((preserve_access_index)); +typedef struct { unsigned long long val; } kernel_cap_t; struct cred { - struct kernel_cap_struct cap_effective; + kernel_cap_t cap_effective; } __attribute__((preserve_access_index)); char _license[] SEC("license") = "GPL"; @@ -18,8 +16,8 @@ char _license[] SEC("license") = "GPL"; SEC("lsm.s/userns_create") int BPF_PROG(test_userns_create, const struct cred *cred, int ret) { - struct kernel_cap_struct caps = cred->cap_effective; - __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN); + kernel_cap_t caps = cred->cap_effective; + __u64 cap_mask = 1ULL << CAP_SYS_ADMIN; if (ret) return 0; -- cgit From 484b7059796e3bc1cb527caa61dfc60da649b4f6 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Thu, 9 Mar 2023 19:50:50 +0300 Subject: nfc: pn533: initialize struct pn533_out_arg properly struct pn533_out_arg used as a temporary context for out_urb is not initialized properly. Its uninitialized 'phy' field can be dereferenced in error cases inside pn533_out_complete() callback function. It causes the following failure: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.2.0-rc3-next-20230110-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 RIP: 0010:pn533_out_complete.cold+0x15/0x44 drivers/nfc/pn533/usb.c:441 Call Trace: __usb_hcd_giveback_urb+0x2b6/0x5c0 drivers/usb/core/hcd.c:1671 usb_hcd_giveback_urb+0x384/0x430 drivers/usb/core/hcd.c:1754 dummy_timer+0x1203/0x32d0 drivers/usb/gadget/udc/dummy_hcd.c:1988 call_timer_fn+0x1da/0x800 kernel/time/timer.c:1700 expire_timers+0x234/0x330 kernel/time/timer.c:1751 __run_timers kernel/time/timer.c:2022 [inline] __run_timers kernel/time/timer.c:1995 [inline] run_timer_softirq+0x326/0x910 kernel/time/timer.c:2035 __do_softirq+0x1fb/0xaf6 kernel/softirq.c:571 invoke_softirq kernel/softirq.c:445 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:650 irq_exit_rcu+0x9/0x20 kernel/softirq.c:662 sysvec_apic_timer_interrupt+0x97/0xc0 arch/x86/kernel/apic/apic.c:1107 Initialize the field with the pn533_usb_phy currently used. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 9dab880d675b ("nfc: pn533: Wait for out_urb's completion in pn533_usb_send_frame()") Reported-by: syzbot+1e608ba4217c96d1952f@syzkaller.appspotmail.com Signed-off-by: Fedor Pchelkin Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230309165050.207390-1-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski --- drivers/nfc/pn533/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index ed9c5e2cf3ad..a187f0e0b0f7 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -175,6 +175,7 @@ static int pn533_usb_send_frame(struct pn533 *dev, print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, out->data, out->len, false); + arg.phy = phy; init_completion(&arg.done); cntx = phy->out_urb->context; phy->out_urb->context = &arg; -- cgit From 25074a44ac4e5dae5b4a25dcb9bbfcbd00f15ae2 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 8 Mar 2023 10:49:33 +0800 Subject: virtio_net: reorder some funcs The purpose of this is to facilitate the subsequent addition of new functions without introducing a separate declaration. Signed-off-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 92 ++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fb5e68ed3ec2..8b31a04052f2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -545,6 +545,52 @@ ok: return skb; } +static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) +{ + unsigned int len; + unsigned int packets = 0; + unsigned int bytes = 0; + void *ptr; + + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(!is_xdp_frame(ptr))) { + struct sk_buff *skb = ptr; + + pr_debug("Sent skb %p\n", skb); + + bytes += skb->len; + napi_consume_skb(skb, in_napi); + } else { + struct xdp_frame *frame = ptr_to_xdp(ptr); + + bytes += xdp_get_frame_len(frame); + xdp_return_frame(frame); + } + packets++; + } + + /* Avoid overhead when no packets have been processed + * happens when called speculatively from start_xmit. + */ + if (!packets) + return; + + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.bytes += bytes; + sq->stats.packets += packets; + u64_stats_update_end(&sq->stats.syncp); +} + +static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct send_queue *sq, struct xdp_frame *xdpf) @@ -1714,52 +1760,6 @@ static int virtnet_receive(struct receive_queue *rq, int budget, return stats.packets; } -static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) -{ - unsigned int len; - unsigned int packets = 0; - unsigned int bytes = 0; - void *ptr; - - while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { - if (likely(!is_xdp_frame(ptr))) { - struct sk_buff *skb = ptr; - - pr_debug("Sent skb %p\n", skb); - - bytes += skb->len; - napi_consume_skb(skb, in_napi); - } else { - struct xdp_frame *frame = ptr_to_xdp(ptr); - - bytes += xdp_get_frame_len(frame); - xdp_return_frame(frame); - } - packets++; - } - - /* Avoid overhead when no packets have been processed - * happens when called speculatively from start_xmit. - */ - if (!packets) - return; - - u64_stats_update_begin(&sq->stats.syncp); - sq->stats.bytes += bytes; - sq->stats.packets += packets; - u64_stats_update_end(&sq->stats.syncp); -} - -static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) -{ - if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) - return false; - else if (q < vi->curr_queue_pairs) - return true; - else - return false; -} - static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; -- cgit From b8ef4809bc7faa22e63de921ef56de21ed191af0 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 8 Mar 2023 10:49:34 +0800 Subject: virtio_net: separate the logic of checking whether sq is full Separate the logic of checking whether sq is full. The subsequent patch will reuse this func. Signed-off-by: Xuan Zhuo Reviewed-by: Alexander Duyck Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 60 +++++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8b31a04052f2..46bbddaadb0d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -591,6 +591,41 @@ static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) return false; } +static void check_sq_full_and_disable(struct virtnet_info *vi, + struct net_device *dev, + struct send_queue *sq) +{ + bool use_napi = sq->napi.weight; + int qnum; + + qnum = sq - vi->sq; + + /* If running out of space, stop queue to avoid getting packets that we + * are then unable to transmit. + * An alternative would be to force queuing layer to requeue the skb by + * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be + * returned in a normal path of operation: it means that driver is not + * maintaining the TX queue stop/start state properly, and causes + * the stack to do a non-trivial amount of useless work. + * Since most packets only take 1 or 2 ring slots, stopping the queue + * early means 16 slots are typically wasted. + */ + if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { + netif_stop_subqueue(dev, qnum); + if (use_napi) { + if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) + virtqueue_napi_schedule(&sq->napi, sq->vq); + } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { + /* More just got used, free them then recheck. */ + free_old_xmit_skbs(sq, false); + if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { + netif_start_subqueue(dev, qnum); + virtqueue_disable_cb(sq->vq); + } + } + } +} + static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct send_queue *sq, struct xdp_frame *xdpf) @@ -1989,30 +2024,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset_ct(skb); } - /* If running out of space, stop queue to avoid getting packets that we - * are then unable to transmit. - * An alternative would be to force queuing layer to requeue the skb by - * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be - * returned in a normal path of operation: it means that driver is not - * maintaining the TX queue stop/start state properly, and causes - * the stack to do a non-trivial amount of useless work. - * Since most packets only take 1 or 2 ring slots, stopping the queue - * early means 16 slots are typically wasted. - */ - if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { - netif_stop_subqueue(dev, qnum); - if (use_napi) { - if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) - virtqueue_napi_schedule(&sq->napi, sq->vq); - } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { - /* More just got used, free them then recheck. */ - free_old_xmit_skbs(sq, false); - if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { - netif_start_subqueue(dev, qnum); - virtqueue_disable_cb(sq->vq); - } - } - } + check_sq_full_and_disable(vi, dev, sq); if (kick || netif_xmit_stopped(txq)) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { -- cgit From cd1c604aa1d8c641f5edcb58b76352d4eba06ec1 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 8 Mar 2023 10:49:35 +0800 Subject: virtio_net: add checking sq is full inside xdp xmit If the queue of xdp xmit is not an independent queue, then when the xdp xmit used all the desc, the xmit from the __dev_queue_xmit() may encounter the following error. net ens4: Unexpected TXQ (0) queue failure: -28 This patch adds a check whether sq is full in xdp xmit. Fixes: 56434a01b12e ("virtio_net: add XDP_TX support") Reported-by: Yichun Zhang Signed-off-by: Xuan Zhuo Reviewed-by: Alexander Duyck Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 46bbddaadb0d..1a309cfb4976 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -767,6 +767,9 @@ static int virtnet_xdp_xmit(struct net_device *dev, } ret = nxmit; + if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) + check_sq_full_and_disable(vi, dev, sq); + if (flags & XDP_XMIT_FLUSH) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) kicks = 1; -- cgit From 71582371a5ee09272b4b4b0a07fa6eb78c9d2f90 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 9 Mar 2023 12:49:11 +0100 Subject: MAINTAINERS: make my email address consistent Use jiri@resnulli.us in all MAINTAINERS entries and fixup .mailmap so all other addresses point to that one. Signed-off-by: Jiri Pirko Link: https://lore.kernel.org/r/20230309114911.923460-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- .mailmap | 3 +++ MAINTAINERS | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index 66754ca6656d..438d79c27717 100644 --- a/.mailmap +++ b/.mailmap @@ -210,6 +210,9 @@ Jens Axboe Jens Osterkamp Jernej Skrabec Jessica Zhang +Jiri Pirko +Jiri Pirko +Jiri Pirko Jiri Slaby Jiri Slaby Jiri Slaby diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f305..d86c7807aa20 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5971,7 +5971,7 @@ F: include/linux/dm-*.h F: include/uapi/linux/dm-*.h DEVLINK -M: Jiri Pirko +M: Jiri Pirko L: netdev@vger.kernel.org S: Supported F: Documentation/networking/devlink @@ -15079,7 +15079,7 @@ F: Documentation/hwmon/nzxt-smart2.rst F: drivers/hwmon/nzxt-smart2.c OBJAGG -M: Jiri Pirko +M: Jiri Pirko L: netdev@vger.kernel.org S: Supported F: include/linux/objagg.h @@ -15853,7 +15853,7 @@ F: drivers/video/logo/logo_parisc* F: include/linux/hp_sdc.h PARMAN -M: Jiri Pirko +M: Jiri Pirko L: netdev@vger.kernel.org S: Supported F: include/linux/parman.h -- cgit From 8ba572052a4b8fe5b205854d27e54e3486049b71 Mon Sep 17 00:00:00 2001 From: "Radu Pirea (OSS)" Date: Thu, 9 Mar 2023 12:01:11 +0200 Subject: net: phy: nxp-c45-tja11xx: fix MII_BASIC_CONFIG_REV bit According to the TJA1103 user manual, the bit for the reversed role in MII or RMII modes is bit 4. Cc: # 5.15+ Fixes: b050f2f15e04 ("phy: nxp-c45: add driver for tja1103") Signed-off-by: Radu Pirea (OSS) Link: https://lore.kernel.org/r/20230309100111.1246214-1-radu-nicolae.pirea@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/nxp-c45-tja11xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 047c581457e3..5813b07242ce 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -79,7 +79,7 @@ #define SGMII_ABILITY BIT(0) #define VEND1_MII_BASIC_CONFIG 0xAFC6 -#define MII_BASIC_CONFIG_REV BIT(8) +#define MII_BASIC_CONFIG_REV BIT(4) #define MII_BASIC_CONFIG_SGMII 0x9 #define MII_BASIC_CONFIG_RGMII 0x7 #define MII_BASIC_CONFIG_RMII 0x5 -- cgit From 59a0b022aa249e3f5735d93de0849341722c4754 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 9 Mar 2023 10:03:36 +0800 Subject: ipvlan: Make skb->skb_iif track skb->dev for l3s mode For l3s mode, skb->dev is set to ipvlan interface in ipvlan_nf_input(): skb->dev = addr->master->dev but, skb->skb_iif remain unchanged, this will cause socket lookup failed if a target socket is bound to a interface, like the following example: ip link add ipvlan0 link eth0 type ipvlan mode l3s ip addr add dev ipvlan0 192.168.124.111/24 ip link set ipvlan0 up ping -c 1 -I ipvlan0 8.8.8.8 100% packet loss This is because there is no match sk in __raw_v4_lookup() as sk->sk_bound_dev_if != dif(skb->skb_iif). Fix this by make skb->skb_iif track skb->dev in ipvlan_nf_input(). Fixes: c675e06a98a4 ("ipvlan: decouple l3s mode dependencies from other modes") Signed-off-by: Jianguo Wu Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/29865b1f-6db7-c07a-de89-949d3721ea30@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvlan_l3s.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c index 943d26cbf39f..71712ea25403 100644 --- a/drivers/net/ipvlan/ipvlan_l3s.c +++ b/drivers/net/ipvlan/ipvlan_l3s.c @@ -101,6 +101,7 @@ static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, goto out; skb->dev = addr->master->dev; + skb->skb_iif = skb->dev->ifindex; len = skb->len + ETH_HLEN; ipvlan_count_rx(addr->master, len, true, false); out: -- cgit From 7e4f8a0c495413a50413e8c9f1032ce1bc633bae Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 9 Mar 2023 10:45:09 -0800 Subject: i40e: Fix kernel crash during reboot when adapter is in recovery mode If the driver detects during probe that firmware is in recovery mode then i40e_init_recovery_mode() is called and the rest of probe function is skipped including pci_set_drvdata(). Subsequent i40e_shutdown() called during shutdown/reboot dereferences NULL pointer as pci_get_drvdata() returns NULL. To fix call pci_set_drvdata() also during entering to recovery mode. Reproducer: 1) Lets have i40e NIC with firmware in recovery mode 2) Run reboot Result: [ 139.084698] i40e: Intel(R) Ethernet Connection XL710 Network Driver [ 139.090959] i40e: Copyright (c) 2013 - 2019 Intel Corporation. [ 139.108438] i40e 0000:02:00.0: Firmware recovery mode detected. Limiting functionality. [ 139.116439] i40e 0000:02:00.0: Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode. [ 139.129499] i40e 0000:02:00.0: fw 8.3.64775 api 1.13 nvm 8.30 0x8000b78d 1.3106.0 [8086:1583] [15d9:084a] [ 139.215932] i40e 0000:02:00.0 enp2s0f0: renamed from eth0 [ 139.223292] i40e 0000:02:00.1: Firmware recovery mode detected. Limiting functionality. [ 139.231292] i40e 0000:02:00.1: Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode. [ 139.244406] i40e 0000:02:00.1: fw 8.3.64775 api 1.13 nvm 8.30 0x8000b78d 1.3106.0 [8086:1583] [15d9:084a] [ 139.329209] i40e 0000:02:00.1 enp2s0f1: renamed from eth0 ... [ 156.311376] BUG: kernel NULL pointer dereference, address: 00000000000006c2 [ 156.318330] #PF: supervisor write access in kernel mode [ 156.323546] #PF: error_code(0x0002) - not-present page [ 156.328679] PGD 0 P4D 0 [ 156.331210] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 156.335567] CPU: 26 PID: 15119 Comm: reboot Tainted: G E 6.2.0+ #1 [ 156.343126] Hardware name: Abacus electric, s.r.o. - servis@abacus.cz Super Server/H12SSW-iN, BIOS 2.4 04/13/2022 [ 156.353369] RIP: 0010:i40e_shutdown+0x15/0x130 [i40e] [ 156.358430] Code: c1 fc ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 89 fd 53 48 8b 9f 48 01 00 00 80 8b c2 06 00 00 04 f0 80 8b c0 06 00 00 08 48 8d bb 08 08 00 [ 156.377168] RSP: 0018:ffffb223c8447d90 EFLAGS: 00010282 [ 156.382384] RAX: ffffffffc073ee70 RBX: 0000000000000000 RCX: 0000000000000001 [ 156.389510] RDX: 0000000080000001 RSI: 0000000000000246 RDI: ffff95db49988000 [ 156.396634] RBP: ffff95db49988000 R08: ffffffffffffffff R09: ffffffff8bd17d40 [ 156.403759] R10: 0000000000000001 R11: ffffffff8a5e3d28 R12: ffff95db49988000 [ 156.410882] R13: ffffffff89a6fe17 R14: ffff95db49988150 R15: 0000000000000000 [ 156.418007] FS: 00007fe7c0cc3980(0000) GS:ffff95ea8ee80000(0000) knlGS:0000000000000000 [ 156.426083] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 156.431819] CR2: 00000000000006c2 CR3: 00000003092fc005 CR4: 0000000000770ee0 [ 156.438944] PKRU: 55555554 [ 156.441647] Call Trace: [ 156.444096] [ 156.446199] pci_device_shutdown+0x38/0x60 [ 156.450297] device_shutdown+0x163/0x210 [ 156.454215] kernel_restart+0x12/0x70 [ 156.457872] __do_sys_reboot+0x1ab/0x230 [ 156.461789] ? vfs_writev+0xa6/0x1a0 [ 156.465362] ? __pfx_file_free_rcu+0x10/0x10 [ 156.469635] ? __call_rcu_common.constprop.85+0x109/0x5a0 [ 156.475034] do_syscall_64+0x3e/0x90 [ 156.478611] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 156.483658] RIP: 0033:0x7fe7bff37ab7 Fixes: 4ff0ee1af016 ("i40e: Introduce recovery mode support") Signed-off-by: Ivan Vecera Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230309184509.984639-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 467001db5070..228cd502bb48 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -15525,6 +15525,7 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw) int err; int v_idx; + pci_set_drvdata(pf->pdev, pf); pci_save_state(pf->pdev); /* set up periodic task facility */ -- cgit From 8f76a4f80fba8096a611b6b60c40a0f4cab3ddfb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 13:25:25 +0100 Subject: tools: ynl: fix render-max for flags definition Properly manage render-max property for flags definition type introducing mask value and setting it to (last_element << 1) - 1 instead of adding max value set to last_element + 1 Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Signed-off-by: Lorenzo Bianconi Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 1bcc5354d800..d47376f19de7 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1931,9 +1931,14 @@ def render_uapi(family, cw): if const.get('render-max', False): cw.nl() - max_name = c_upper(name_pfx + 'max') - cw.p('__' + max_name + ',') - cw.p(max_name + ' = (__' + max_name + ' - 1)') + if const['type'] == 'flags': + max_name = c_upper(name_pfx + 'mask') + max_val = f' = {enum.get_mask()},' + cw.p(max_name + max_val) + else: + max_name = c_upper(name_pfx + 'max') + cw.p('__' + max_name + ',') + cw.p(max_name + ' = (__' + max_name + ' - 1)') cw.block_end(line=';') cw.nl() elif const['type'] == 'const': -- cgit From bf51d27704c963ea52c0843096e23c9f404b13af Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 13:25:26 +0100 Subject: tools: ynl: fix get_mask utility routine Fix get_mask utility routine in order to take into account possible gaps in the elements list. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Signed-off-by: Lorenzo Bianconi Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/nlspec.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index a34d088f6743..960a356e8225 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -138,10 +138,8 @@ class SpecEnumSet(SpecElement): def get_mask(self): mask = 0 - idx = self.yaml.get('value-start', 0) - for _ in self.entries.values(): - mask |= 1 << idx - idx += 1 + for e in self.entries.values(): + mask += e.user_value() return mask -- cgit From f85949f98206b3b11d92d695cea4efda6a81f00e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 13:25:27 +0100 Subject: xdp: add xdp_set_features_flag utility routine Introduce xdp_set_features_flag utility routine in order to update dynamically xdp_features according to the dynamic hw configuration via ethtool (e.g. changing number of hw rx/tx queues). Add xdp_clear_features_flag() in order to clear all xdp_feature flag. Reviewed-by: Shay Agroskin Signed-off-by: Lorenzo Bianconi Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 1 + include/net/xdp.h | 11 +++++++++++ include/uapi/linux/netdev.h | 2 ++ net/core/xdp.c | 26 +++++++++++++++++++------- tools/include/uapi/linux/netdev.h | 2 ++ 5 files changed, 35 insertions(+), 7 deletions(-) diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 24de747b5344..753e5914a8b7 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -9,6 +9,7 @@ definitions: - type: flags name: xdp-act + render-max: true entries: - name: basic diff --git a/include/net/xdp.h b/include/net/xdp.h index d517bfac937b..41c57b8b1671 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -428,12 +428,18 @@ MAX_XDP_METADATA_KFUNC, #ifdef CONFIG_NET u32 bpf_xdp_metadata_kfunc_id(int id); bool bpf_dev_bound_kfunc_id(u32 btf_id); +void xdp_set_features_flag(struct net_device *dev, xdp_features_t val); void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg); void xdp_features_clear_redirect_target(struct net_device *dev); #else static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } +static inline void +xdp_set_features_flag(struct net_device *dev, xdp_features_t val) +{ +} + static inline void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) { @@ -445,4 +451,9 @@ xdp_features_clear_redirect_target(struct net_device *dev) } #endif +static inline void xdp_clear_features_flag(struct net_device *dev) +{ + xdp_set_features_flag(dev, 0); +} + #endif /* __LINUX_NET_XDP_H__ */ diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 8c4e3e536c04..ed134fbdfd32 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -33,6 +33,8 @@ enum netdev_xdp_act { NETDEV_XDP_ACT_HW_OFFLOAD = 16, NETDEV_XDP_ACT_RX_SG = 32, NETDEV_XDP_ACT_NDO_XMIT_SG = 64, + + NETDEV_XDP_ACT_MASK = 127, }; enum { diff --git a/net/core/xdp.c b/net/core/xdp.c index 8c92fc553317..87e654b7d06c 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -774,20 +774,32 @@ static int __init xdp_metadata_init(void) } late_initcall(xdp_metadata_init); -void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) +void xdp_set_features_flag(struct net_device *dev, xdp_features_t val) { - dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; - if (support_sg) - dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT_SG; + val &= NETDEV_XDP_ACT_MASK; + if (dev->xdp_features == val) + return; + dev->xdp_features = val; call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); } +EXPORT_SYMBOL_GPL(xdp_set_features_flag); + +void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg) +{ + xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT); + + if (support_sg) + val |= NETDEV_XDP_ACT_NDO_XMIT_SG; + xdp_set_features_flag(dev, val); +} EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target); void xdp_features_clear_redirect_target(struct net_device *dev) { - dev->xdp_features &= ~(NETDEV_XDP_ACT_NDO_XMIT | - NETDEV_XDP_ACT_NDO_XMIT_SG); - call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); + xdp_features_t val = dev->xdp_features; + + val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG); + xdp_set_features_flag(dev, val); } EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 8c4e3e536c04..ed134fbdfd32 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -33,6 +33,8 @@ enum netdev_xdp_act { NETDEV_XDP_ACT_HW_OFFLOAD = 16, NETDEV_XDP_ACT_RX_SG = 32, NETDEV_XDP_ACT_NDO_XMIT_SG = 64, + + NETDEV_XDP_ACT_MASK = 127, }; enum { -- cgit From 3c249fe4de1608a9af56563606d4f6eb3a64a47f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 13:25:28 +0100 Subject: net: thunderx: take into account xdp_features setting tx/rx queues thunderx nic allows xdp just if enough hw queues are available for XDP. Take into account queues configuration setting xdp_features. Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Signed-off-by: Lorenzo Bianconi Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c | 17 +++++++++++------ drivers/net/ethernet/cavium/thunder/nicvf_main.c | 4 +++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index e5c71f907852..d8d71bf97983 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -735,12 +735,17 @@ static int nicvf_set_channels(struct net_device *dev, if (channel->tx_count > nic->max_queues) return -EINVAL; - if (nic->xdp_prog && - ((channel->tx_count + channel->rx_count) > nic->max_queues)) { - netdev_err(nic->netdev, - "XDP mode, RXQs + TXQs > Max %d\n", - nic->max_queues); - return -EINVAL; + if (channel->tx_count + channel->rx_count > nic->max_queues) { + if (nic->xdp_prog) { + netdev_err(nic->netdev, + "XDP mode, RXQs + TXQs > Max %d\n", + nic->max_queues); + return -EINVAL; + } + + xdp_clear_features_flag(nic->netdev); + } else if (!pass1_silicon(nic->pdev)) { + xdp_set_features_flag(dev, NETDEV_XDP_ACT_BASIC); } if (if_up) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 8b25313c7f6b..eff350e0bc2a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2218,7 +2218,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->netdev_ops = &nicvf_netdev_ops; netdev->watchdog_timeo = NICVF_TX_TIMEOUT; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + if (!pass1_silicon(nic->pdev) && + nic->rx_queues + nic->tx_queues <= nic->max_queues) + netdev->xdp_features = NETDEV_XDP_ACT_BASIC; /* MTU range: 64 - 9200 */ netdev->min_mtu = NIC_HW_MIN_FRS; -- cgit From 7aa6dc351b92abbfead9ebe25ce7a7ac0384ea6d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 13:25:29 +0100 Subject: net: ena: take into account xdp_features setting tx/rx queues ena nic allows xdp just if enough hw queues are available for XDP. Take into account queues configuration setting xdp_features. Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Reviewed-by: Shay Agroskin Signed-off-by: Lorenzo Bianconi Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 15 ++++++++++++--- drivers/net/ethernet/amazon/ena/ena_netdev.c | 6 ++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 8da79eedc057..1d4f2f4d10f2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -850,11 +850,20 @@ static int ena_set_channels(struct net_device *netdev, struct ena_adapter *adapter = netdev_priv(netdev); u32 count = channels->combined_count; /* The check for max value is already done in ethtool */ - if (count < ENA_MIN_NUM_IO_QUEUES || - (ena_xdp_present(adapter) && - !ena_xdp_legal_queue_count(adapter, count))) + if (count < ENA_MIN_NUM_IO_QUEUES) return -EINVAL; + if (!ena_xdp_legal_queue_count(adapter, count)) { + if (ena_xdp_present(adapter)) + return -EINVAL; + + xdp_clear_features_flag(netdev); + } else { + xdp_set_features_flag(netdev, + NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT); + } + return ena_update_queue_count(adapter, count); } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index d3999db7c6a2..cbfe7f977270 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -4105,8 +4105,6 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter, /* Set offload features */ ena_set_dev_offloads(feat, netdev); - netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; - adapter->max_mtu = feat->dev_attr.max_mtu; netdev->max_mtu = adapter->max_mtu; netdev->min_mtu = ENA_MIN_MTU; @@ -4393,6 +4391,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ena_config_debug_area(adapter); + if (ena_xdp_legal_queue_count(adapter, adapter->num_io_queues)) + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT; + memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len); netif_carrier_off(netdev); -- cgit From fccca038f3003daa8f28a5e5d97efe50f04b8d9d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 13:25:30 +0100 Subject: veth: take into account device reconfiguration for xdp_features flag Take into account tx/rx queues reconfiguration setting device xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable ndo_xdp_xmit callback. Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Signed-off-by: Lorenzo Bianconi Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1bb54de7124d..293dc3b2c84a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1257,6 +1257,26 @@ static int veth_enable_range_safe(struct net_device *dev, int start, int end) return 0; } +static void veth_set_xdp_features(struct net_device *dev) +{ + struct veth_priv *priv = netdev_priv(dev); + struct net_device *peer; + + peer = rcu_dereference(priv->peer); + if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) { + xdp_features_t val = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG; + + if (priv->_xdp_prog || veth_gro_requested(dev)) + val |= NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; + xdp_set_features_flag(dev, val); + } else { + xdp_clear_features_flag(dev); + } +} + static int veth_set_channels(struct net_device *dev, struct ethtool_channels *ch) { @@ -1323,6 +1343,12 @@ out: if (peer) netif_carrier_on(peer); } + + /* update XDP supported features */ + veth_set_xdp_features(dev); + if (peer) + veth_set_xdp_features(peer); + return err; revert: @@ -1489,7 +1515,10 @@ static int veth_set_features(struct net_device *dev, err = veth_napi_enable(dev); if (err) return err; + + xdp_features_set_redirect_target(dev, true); } else { + xdp_features_clear_redirect_target(dev); veth_napi_del(dev); } return 0; @@ -1570,10 +1599,15 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; peer->max_mtu = max_mtu; } + + xdp_features_set_redirect_target(dev, true); } if (old_prog) { if (!prog) { + if (!veth_gro_requested(dev)) + xdp_features_clear_redirect_target(dev); + if (dev->flags & IFF_UP) veth_disable_xdp(dev); @@ -1686,10 +1720,6 @@ static void veth_setup(struct net_device *dev) dev->hw_enc_features = VETH_FEATURES; dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; netif_set_tso_max_size(dev, GSO_MAX_SIZE); - - dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | - NETDEV_XDP_ACT_NDO_XMIT_SG; } /* @@ -1857,6 +1887,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, goto err_queues; veth_disable_gro(dev); + /* update XDP supported features */ + veth_set_xdp_features(dev); + veth_set_xdp_features(peer); + return 0; err_queues: -- cgit From 4d5ab0ad964df178beba031b89429a601893ff61 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 9 Mar 2023 13:25:31 +0100 Subject: net/mlx5e: take into account device reconfiguration for xdp_features flag Take into account LRO and GRO configuration setting device xdp_features flag. Consider channel rq_wq_type enabling rx scatter-gatter support in xdp_features flag and disable NETDEV_XDP_ACT_NDO_XMIT_SG since it is not supported yet by the driver. Moreover always enable NETDEV_XDP_ACT_NDO_XMIT as the ndo_xdp_xmit callback does not require to load a dummy xdp program on the NIC. Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Co-developed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Lorenzo Bianconi Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 10 +++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 37 +++++++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 ++ 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 88460b7796e5..4276c6eb6820 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1243,6 +1243,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); +void mlx5e_set_xdp_feature(struct net_device *netdev); netdev_features_t mlx5e_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7708acc9b2ab..79fd21ecb9cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1985,6 +1985,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_params new_params; + int err; if (enable) { /* Checking the regular RQ here; mlx5e_validate_xsk_param called @@ -2005,7 +2006,14 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); mlx5e_set_rq_type(mdev, &new_params); - return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + if (err) + return err; + + /* update XDP supported features */ + mlx5e_set_xdp_feature(netdev); + + return 0; } static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 76a9c5194a70..51b5f3cca504 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4004,6 +4004,25 @@ static int mlx5e_handle_feature(struct net_device *netdev, return 0; } +void mlx5e_set_xdp_feature(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params *params = &priv->channels.params; + xdp_features_t val; + + if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { + xdp_clear_features_flag(netdev); + return; + } + + val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_NDO_XMIT; + if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) + val |= NETDEV_XDP_ACT_RX_SG; + xdp_set_features_flag(netdev, val); +} + int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { netdev_features_t oper_features = features; @@ -4030,6 +4049,9 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) return -EINVAL; } + /* update XDP supported features */ + mlx5e_set_xdp_feature(netdev); + return 0; } @@ -4761,13 +4783,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) if (old_prog) bpf_prog_put(old_prog); - if (reset) { - if (prog) - xdp_features_set_redirect_target(netdev, true); - else - xdp_features_clear_redirect_target(netdev); - } - if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) goto unlock; @@ -5163,13 +5178,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_XSK_ZEROCOPY | - NETDEV_XDP_ACT_RX_SG; - netdev->priv_flags |= IFF_UNICAST_FLT; netif_set_tso_max_size(netdev, GSO_MAX_SIZE); + mlx5e_set_xdp_feature(netdev); mlx5e_set_netdev_dev_addr(netdev); mlx5e_macsec_build_netdev(priv); mlx5e_ipsec_build_netdev(priv); @@ -5241,6 +5253,9 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); mlx5e_health_create_reporters(priv); + /* update XDP supported features */ + mlx5e_set_xdp_feature(netdev); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 9b9203443085..43fd12fb87b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -747,6 +747,9 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* RQ */ mlx5e_build_rq_params(mdev, params); + /* update XDP supported features */ + mlx5e_set_xdp_feature(netdev); + /* CQ moderation params */ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode); -- cgit From 481e96fc1307eb52c0c449608e629921ecbbaf15 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 9 Mar 2023 13:25:32 +0100 Subject: mvpp2: take care of xdp_features when reconfiguring queues XDP is supported only if enough queues are present, so when reconfiguring the queues set xdp_features accordingly. Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Suggested-by: Lorenzo Bianconi Signed-off-by: Matteo Croce Signed-off-by: Lorenzo Bianconi Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 9b4ecbe4f36d..3ea00bc9b91c 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4996,6 +4996,14 @@ static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu) for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; + if (percpu && port->ntxqs >= num_possible_cpus() * 2) + xdp_set_features_flag(port->dev, + NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT); + else + xdp_clear_features_flag(port->dev); + mvpp2_swf_bm_pool_init(port); if (status[i]) mvpp2_open(port->dev); @@ -6863,13 +6871,14 @@ static int mvpp2_port_probe(struct platform_device *pdev, if (!port->priv->percpu_pools) mvpp2_set_hw_csum(port, port->pool_long->id); + else if (port->ntxqs >= num_possible_cpus() * 2) + dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; dev->vlan_features |= features; netif_set_tso_max_segs(dev, MVPP2_MAX_TSO_SEGS); - dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; - dev->priv_flags |= IFF_UNICAST_FLT; /* MTU range: 68 - 9704 */ -- cgit From b7a679ba7c652587b85294f4953f33ac0b756d40 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Mar 2023 15:49:57 +0100 Subject: mptcp: fix possible deadlock in subflow_error_report Christoph reported a possible deadlock while the TCP stack destroys an unaccepted subflow due to an incoming reset: the MPTCP socket error path tries to acquire the msk-level socket lock while TCP still owns the listener socket accept queue spinlock, and the reverse dependency already exists in the TCP stack. Note that the above is actually a lockdep false positive, as the chain involves two separate sockets. A different per-socket lockdep key will address the issue, but such a change will be quite invasive. Instead, we can simply stop earlier the socket error handling for orphaned or unaccepted subflows, breaking the critical lockdep chain. Error handling in such a scenario is a no-op. Reported-and-tested-by: Christoph Paasch Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/355 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4ae1a7304cf0..5070dc33675d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1432,6 +1432,13 @@ static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + /* bail early if this is a no-op, so that we avoid introducing a + * problematic lockdep dependency between TCP accept queue lock + * and msk socket spinlock + */ + if (!sk->sk_socket) + return; + mptcp_data_lock(sk); if (!sock_owned_by_user(sk)) __mptcp_error_report(sk); -- cgit From 3a236aef280ed5122b2d47087eb514d0921ae033 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Mar 2023 15:49:58 +0100 Subject: mptcp: refactor passive socket initialization After commit 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") unaccepted msk sockets go throu complete shutdown, we don't need anymore to delay inserting the first subflow into the subflow lists. The reference counting deserve some extra care, as __mptcp_close() is unaware of the request socket linkage to the first subflow. Please note that this is more a refactoring than a fix but because this modification is needed to include other corrections, see the following commits. Then a Fixes tag has been added here to help the stable team. Fixes: 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Tested-by: Christoph Paasch Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 17 ----------------- net/mptcp/subflow.c | 27 +++++++++++++++++++++------ 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ad9c46202fc..447641d34c2c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -825,7 +825,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_socket && !ssk->sk_socket) mptcp_sock_graft(ssk, sk->sk_socket); - mptcp_propagate_sndbuf((struct sock *)msk, ssk); mptcp_sockopt_sync_locked(msk, ssk); return true; } @@ -3708,22 +3707,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, lock_sock(newsk); - /* PM/worker can now acquire the first subflow socket - * lock without racing with listener queue cleanup, - * we can notify it, if needed. - * - * Even if remote has reset the initial subflow by now - * the refcnt is still at least one. - */ - subflow = mptcp_subflow_ctx(msk->first); - list_add(&subflow->node, &msk->conn_list); - sock_hold(msk->first); - if (mptcp_is_fully_established(newsk)) - mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); - - mptcp_rcv_space_init(msk, msk->first); - mptcp_propagate_sndbuf(newsk, msk->first); - /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 5070dc33675d..a631a5e6fc7b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -397,6 +397,12 @@ void mptcp_subflow_reset(struct sock *ssk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; + /* mptcp_mp_fail_no_response() can reach here on an already closed + * socket + */ + if (ssk->sk_state == TCP_CLOSE) + return; + /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); @@ -750,6 +756,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct mptcp_options_received mp_opt; bool fallback, fallback_is_fatal; struct sock *new_msk = NULL; + struct mptcp_sock *owner; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -824,6 +831,8 @@ create_child: ctx->setsockopt_seq = listener->setsockopt_seq; if (ctx->mp_capable) { + owner = mptcp_sk(new_msk); + /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ @@ -832,14 +841,14 @@ create_child: /* record the newly created socket as the first msk * subflow, but don't link it yet into conn_list */ - WRITE_ONCE(mptcp_sk(new_msk)->first, child); + WRITE_ONCE(owner->first, child); /* new mpc subflow takes ownership of the newly * created mptcp socket */ mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; - mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); - mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); + mptcp_pm_new_connection(owner, child, 1); + mptcp_token_accept(subflow_req, owner); ctx->conn = new_msk; new_msk = NULL; @@ -847,15 +856,21 @@ create_child: * uses the correct data */ mptcp_copy_inaddrs(ctx->conn, child); + mptcp_propagate_sndbuf(ctx->conn, child); + + mptcp_rcv_space_init(owner, child); + list_add(&ctx->node, &owner->conn_list); + sock_hold(child); /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) + if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { mptcp_subflow_fully_established(ctx, &mp_opt); + mptcp_pm_fully_established(owner, child, GFP_ATOMIC); + ctx->pm_notified = 1; + } } else if (ctx->mp_join) { - struct mptcp_sock *owner; - owner = subflow_req->msk; if (!owner) { subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); -- cgit From b6985b9b82954caa53f862d6059d06c0526254f0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Mar 2023 15:49:59 +0100 Subject: mptcp: use the workqueue to destroy unaccepted sockets Christoph reported a UaF at token lookup time after having refactored the passive socket initialization part: BUG: KASAN: use-after-free in __token_bucket_busy+0x253/0x260 Read of size 4 at addr ffff88810698d5b0 by task syz-executor653/3198 CPU: 1 PID: 3198 Comm: syz-executor653 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x6e/0x91 print_report+0x16a/0x46f kasan_report+0xad/0x130 __token_bucket_busy+0x253/0x260 mptcp_token_new_connect+0x13d/0x490 mptcp_connect+0x4ed/0x860 __inet_stream_connect+0x80e/0xd90 tcp_sendmsg_fastopen+0x3ce/0x710 mptcp_sendmsg+0xff1/0x1a20 inet_sendmsg+0x11d/0x140 __sys_sendto+0x405/0x490 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc We need to properly clean-up all the paired MPTCP-level resources and be sure to release the msk last, even when the unaccepted subflow is destroyed by the TCP internals via inet_child_forget(). We can re-use the existing MPTCP_WORK_CLOSE_SUBFLOW infra, explicitly checking that for the critical scenario: the closed subflow is the MPC one, the msk is not accepted and eventually going through full cleanup. With such change, __mptcp_destroy_sock() is always called on msk sockets, even on accepted ones. We don't need anymore to transiently drop one sk reference at msk clone time. Please note this commit depends on the parent one: mptcp: refactor passive socket initialization Fixes: 58b09919626b ("mptcp: create msk early") Cc: stable@vger.kernel.org Reported-and-tested-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/347 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 40 ++++++++++++++++++++++++++++++---------- net/mptcp/protocol.h | 5 ++++- net/mptcp/subflow.c | 17 ++++++++++++----- 3 files changed, 46 insertions(+), 16 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 447641d34c2c..2a2093d61835 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2342,7 +2342,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out; } - sock_orphan(ssk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2350,7 +2349,20 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, * reference owned by msk; */ if (!inet_csk(ssk)->icsk_ulp_ops) { + WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD)); kfree_rcu(subflow, rcu); + } else if (msk->in_accept_queue && msk->first == ssk) { + /* if the first subflow moved to a close state, e.g. due to + * incoming reset and we reach here before inet_child_forget() + * the TCP stack could later try to close it via + * inet_csk_listen_stop(), or deliver it to the user space via + * accept(). + * We can't delete the subflow - or risk a double free - nor let + * the msk survive - or will be leaked in the non accept scenario: + * fallback and let TCP cope with the subflow cleanup. + */ + WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD)); + mptcp_subflow_drop_ctx(ssk); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ if (ssk->sk_state == TCP_LISTEN) { @@ -2398,9 +2410,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } -static void __mptcp_close_subflow(struct mptcp_sock *msk) +static void __mptcp_close_subflow(struct sock *sk) { struct mptcp_subflow_context *subflow, *tmp; + struct mptcp_sock *msk = mptcp_sk(sk); might_sleep(); @@ -2414,7 +2427,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) if (!skb_queue_empty_lockless(&ssk->sk_receive_queue)) continue; - mptcp_close_ssk((struct sock *)msk, ssk, subflow); + mptcp_close_ssk(sk, ssk, subflow); + } + + /* if the MPC subflow has been closed before the msk is accepted, + * msk will never be accept-ed, close it now + */ + if (!msk->first && msk->in_accept_queue) { + sock_set_flag(sk, SOCK_DEAD); + inet_sk_state_store(sk, TCP_CLOSE); } } @@ -2623,6 +2644,9 @@ static void mptcp_worker(struct work_struct *work) __mptcp_check_send_data_fin(sk); mptcp_check_data_fin(sk); + if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + __mptcp_close_subflow(sk); + /* There is no point in keeping around an orphaned sk timedout or * closed, but we need the msk around to reply to incoming DATA_FIN, * even if it is orphaned and in FIN_WAIT2 state @@ -2638,9 +2662,6 @@ static void mptcp_worker(struct work_struct *work) } } - if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) - __mptcp_close_subflow(msk); - if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); @@ -3078,6 +3099,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; msk->subflow = NULL; + msk->in_accept_queue = 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(msk->csum_enabled, true); @@ -3095,8 +3117,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, security_inet_csk_clone(nsk, req); bh_unlock_sock(nsk); - /* keep a single reference */ - __sock_put(nsk); + /* note: the newly allocated socket refcount is 2 now */ return nsk; } @@ -3152,8 +3173,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, goto out; } - /* acquire the 2nd reference for the owning socket */ - sock_hold(new_mptcp_sock); newsk = new_mptcp_sock; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); } else { @@ -3704,6 +3723,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct sock *newsk = newsock->sk; set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); + msk->in_accept_queue = 0; lock_sock(newsk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 61fd8eabfca2..3a2db1b862dd 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -295,7 +295,8 @@ struct mptcp_sock { u8 recvmsg_inq:1, cork:1, nodelay:1, - fastopening:1; + fastopening:1, + in_accept_queue:1; int connect_flags; struct work_struct work; struct sk_buff *ooo_last_skb; @@ -666,6 +667,8 @@ void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); +void mptcp_subflow_drop_ctx(struct sock *ssk); + static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a631a5e6fc7b..932a3e0eb22d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -699,9 +699,10 @@ static bool subflow_hmac_valid(const struct request_sock *req, static void mptcp_force_close(struct sock *sk) { - /* the msk is not yet exposed to user-space */ + /* the msk is not yet exposed to user-space, and refcount is 2 */ inet_sk_state_store(sk, TCP_CLOSE); sk_common_release(sk); + sock_put(sk); } static void subflow_ulp_fallback(struct sock *sk, @@ -717,7 +718,7 @@ static void subflow_ulp_fallback(struct sock *sk, mptcp_subflow_ops_undo_override(sk); } -static void subflow_drop_ctx(struct sock *ssk) +void mptcp_subflow_drop_ctx(struct sock *ssk) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); @@ -823,7 +824,7 @@ create_child: if (new_msk) mptcp_copy_inaddrs(new_msk, child); - subflow_drop_ctx(child); + mptcp_subflow_drop_ctx(child); goto out; } @@ -914,7 +915,7 @@ out: return child; dispose_child: - subflow_drop_ctx(child); + mptcp_subflow_drop_ctx(child); tcp_rsk(req)->drop_req = true; inet_csk_prepare_for_destroy_sock(child); tcp_done(child); @@ -1866,7 +1867,6 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s struct sock *sk = (struct sock *)msk; bool do_cancel_work; - sock_hold(sk); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next = msk->dl_next; msk->first = NULL; @@ -1954,6 +1954,13 @@ static void subflow_ulp_release(struct sock *ssk) * when the subflow is still unaccepted */ release = ctx->disposable || list_empty(&ctx->node); + + /* inet_child_forget() does not call sk_state_change(), + * explicitly trigger the socket close machinery + */ + if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, + &mptcp_sk(sk)->flags)) + mptcp_schedule_work(sk); sock_put(sk); } -- cgit From 0a3f4f1f9c27215e4ddcd312558342e57b93e518 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Mar 2023 15:50:00 +0100 Subject: mptcp: fix UaF in listener shutdown As reported by Christoph after having refactored the passive socket initialization, the mptcp listener shutdown path is prone to an UaF issue. BUG: KASAN: use-after-free in _raw_spin_lock_bh+0x73/0xe0 Write of size 4 at addr ffff88810cb23098 by task syz-executor731/1266 CPU: 1 PID: 1266 Comm: syz-executor731 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x6e/0x91 print_report+0x16a/0x46f kasan_report+0xad/0x130 kasan_check_range+0x14a/0x1a0 _raw_spin_lock_bh+0x73/0xe0 subflow_error_report+0x6d/0x110 sk_error_report+0x3b/0x190 tcp_disconnect+0x138c/0x1aa0 inet_child_forget+0x6f/0x2e0 inet_csk_listen_stop+0x209/0x1060 __mptcp_close_ssk+0x52d/0x610 mptcp_destroy_common+0x165/0x640 mptcp_destroy+0x13/0x80 __mptcp_destroy_sock+0xe7/0x270 __mptcp_close+0x70e/0x9b0 mptcp_close+0x2b/0x150 inet_release+0xe9/0x1f0 __sock_release+0xd2/0x280 sock_close+0x15/0x20 __fput+0x252/0xa20 task_work_run+0x169/0x250 exit_to_user_mode_prepare+0x113/0x120 syscall_exit_to_user_mode+0x1d/0x40 do_syscall_64+0x48/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc The msk grace period can legitly expire in between the last reference count dropped in mptcp_subflow_queue_clean() and the later eventual access in inet_csk_listen_stop() After the previous patch we don't need anymore special-casing msk listener socket cleanup: the mptcp worker will process each of the unaccepted msk sockets. Just drop the now unnecessary code. Please note this commit depends on the two parent ones: mptcp: refactor passive socket initialization mptcp: use the workqueue to destroy unaccepted sockets Fixes: 6aeed9045071 ("mptcp: fix race on unaccepted mptcp sockets") Cc: stable@vger.kernel.org Reported-and-tested-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/346 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 7 ++--- net/mptcp/protocol.h | 1 - net/mptcp/subflow.c | 72 ---------------------------------------------------- 3 files changed, 2 insertions(+), 78 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2a2093d61835..60b23b2716c4 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2365,12 +2365,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, mptcp_subflow_drop_ctx(ssk); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); + if (ssk->sk_state == TCP_LISTEN) mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - } + __tcp_close(ssk, 0); /* close acquired an extra ref */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3a2db1b862dd..339a6f072989 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -629,7 +629,6 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); -void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 932a3e0eb22d..9c57575df84c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1826,78 +1826,6 @@ static void subflow_state_change(struct sock *sk) } } -void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) -{ - struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; - struct mptcp_sock *msk, *next, *head = NULL; - struct request_sock *req; - - /* build a list of all unaccepted mptcp sockets */ - spin_lock_bh(&queue->rskq_lock); - for (req = queue->rskq_accept_head; req; req = req->dl_next) { - struct mptcp_subflow_context *subflow; - struct sock *ssk = req->sk; - struct mptcp_sock *msk; - - if (!sk_is_mptcp(ssk)) - continue; - - subflow = mptcp_subflow_ctx(ssk); - if (!subflow || !subflow->conn) - continue; - - /* skip if already in list */ - msk = mptcp_sk(subflow->conn); - if (msk->dl_next || msk == head) - continue; - - msk->dl_next = head; - head = msk; - } - spin_unlock_bh(&queue->rskq_lock); - if (!head) - return; - - /* can't acquire the msk socket lock under the subflow one, - * or will cause ABBA deadlock - */ - release_sock(listener_ssk); - - for (msk = head; msk; msk = next) { - struct sock *sk = (struct sock *)msk; - bool do_cancel_work; - - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - next = msk->dl_next; - msk->first = NULL; - msk->dl_next = NULL; - - do_cancel_work = __mptcp_close(sk, 0); - release_sock(sk); - if (do_cancel_work) { - /* lockdep will report a false positive ABBA deadlock - * between cancel_work_sync and the listener socket. - * The involved locks belong to different sockets WRT - * the existing AB chain. - * Using a per socket key is problematic as key - * deregistration requires process context and must be - * performed at socket disposal time, in atomic - * context. - * Just tell lockdep to consider the listener socket - * released here. - */ - mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); - mptcp_cancel_work(sk); - mutex_acquire(&listener_sk->sk_lock.dep_map, - SINGLE_DEPTH_NESTING, 0, _RET_IP_); - } - sock_put(sk); - } - - /* we are still under the listener msk socket lock */ - lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); -} - static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); -- cgit From 840742b7ed0e123e47af9c5c3902746f3d6b64a2 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 9 Mar 2023 15:50:01 +0100 Subject: selftests: mptcp: userspace pm: fix printed values In case of errors, the printed message had the expected and the seen value inverted. This patch simply correct the order: first the expected value, then the one that has been seen. Fixes: 10d4273411be ("selftests: mptcp: userspace: print error details if any") Cc: stable@vger.kernel.org Acked-by: Geliang Tang Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 66c5be25c13d..48e52f995a98 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -240,7 +240,7 @@ check_expected_one() fi stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \ - "${var}" "${!var}" "${!exp}" + "${var}" "${!exp}" "${!var}" return 1 } -- cgit From 822467a48e938e661965d09df5fcac66f7291050 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 9 Mar 2023 15:50:02 +0100 Subject: mptcp: add ro_after_init for tcp{,v6}_prot_override Add __ro_after_init labels for the variables tcp_prot_override and tcpv6_prot_override, just like other variables adjacent to them, to indicate that they are initialised from the init hooks and no writes occur afterwards. Fixes: b19bc2945b40 ("mptcp: implement delegated actions") Cc: stable@vger.kernel.org Fixes: 51fa7f8ebf0e ("mptcp: mark ops structures as ro_after_init") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9c57575df84c..2aadc8733369 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -628,7 +628,7 @@ static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init; static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init; -static struct proto tcpv6_prot_override; +static struct proto tcpv6_prot_override __ro_after_init; static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) { @@ -926,7 +926,7 @@ dispose_child: } static struct inet_connection_sock_af_ops subflow_specific __ro_after_init; -static struct proto tcp_prot_override; +static struct proto tcp_prot_override __ro_after_init; enum mapping_status { MAPPING_OK, -- cgit From 3ba14528684f528566fb7d956bfbfb958b591d86 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 9 Mar 2023 15:50:03 +0100 Subject: mptcp: avoid setting TCP_CLOSE state twice tcp_set_state() is called from tcp_done() already. There is then no need to first set the state to TCP_CLOSE, then call tcp_done(). Fixes: d582484726c4 ("mptcp: fix fallback for MP_JOIN subflows") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/362 Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 2aadc8733369..a0041360ee9d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -406,7 +406,6 @@ void mptcp_subflow_reset(struct sock *ssk) /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); - tcp_set_state(ssk, TCP_CLOSE); tcp_send_active_reset(ssk, GFP_ATOMIC); tcp_done(ssk); if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) && -- cgit From cee4034a3db1d30c3243dd51506a9d4ab1a849fa Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Mar 2023 15:50:04 +0100 Subject: mptcp: fix lockdep false positive in mptcp_pm_nl_create_listen_socket() Christoph reports a lockdep splat in the mptcp_subflow_create_socket() error path, when such function is invoked by mptcp_pm_nl_create_listen_socket(). Such code path acquires two separates, nested socket lock, with the internal lock operation lacking the "nested" annotation. Adding that in sock_release() for mptcp's sake only could be confusing. Instead just add a new lockclass to the in-kernel msk socket, re-initializing the lockdep infra after the socket creation. Fixes: ad2171009d96 ("mptcp: fix locking for in-kernel listener creation") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/354 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Tested-by: Christoph Paasch Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 56628b52d100..5c8dea49626c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -997,9 +997,13 @@ out: return ret; } +static struct lock_class_key mptcp_slock_keys[2]; +static struct lock_class_key mptcp_keys[2]; + static int mptcp_pm_nl_create_listen_socket(struct sock *sk, struct mptcp_pm_addr_entry *entry) { + bool is_ipv6 = sk->sk_family == AF_INET6; int addrlen = sizeof(struct sockaddr_in); struct sockaddr_storage addr; struct socket *ssock; @@ -1016,6 +1020,18 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (!newsk) return -EINVAL; + /* The subflow socket lock is acquired in a nested to the msk one + * in several places, even by the TCP stack, and this msk is a kernel + * socket: lockdep complains. Instead of propagating the _nested + * modifiers in several places, re-init the lock class for the msk + * socket to an mptcp specific one. + */ + sock_lock_init_class_and_name(newsk, + is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", + &mptcp_slock_keys[is_ipv6], + is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", + &mptcp_keys[is_ipv6]); + lock_sock(newsk); ssock = __mptcp_nmpc_socket(mptcp_sk(newsk)); release_sock(newsk); -- cgit From cb090e64cf25602b9adaf32d5dfc9c8bec493cd1 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Fri, 10 Mar 2023 16:40:07 +0800 Subject: hwmon: (xgene) Fix use after free bug in xgene_hwmon_remove due to race condition In xgene_hwmon_probe, &ctx->workq is bound with xgene_hwmon_evt_work. Then it will be started. If we remove the driver which will call xgene_hwmon_remove to clean up, there may be unfinished work. The possible sequence is as follows: Fix it by finishing the work before cleanup in xgene_hwmon_remove. CPU0 CPU1 |xgene_hwmon_evt_work xgene_hwmon_remove | kfifo_free(&ctx->async_msg_fifo);| | |kfifo_out_spinlocked |//use &ctx->async_msg_fifo Fixes: 2ca492e22cb7 ("hwmon: (xgene) Fix crash when alarm occurs before driver probe") Signed-off-by: Zheng Wang Link: https://lore.kernel.org/r/20230310084007.1403388-1-zyytlz.wz@163.com Signed-off-by: Guenter Roeck --- drivers/hwmon/xgene-hwmon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index 5cde837bfd09..d1abea49f01b 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -761,6 +761,7 @@ static int xgene_hwmon_remove(struct platform_device *pdev) { struct xgene_hwmon_dev *ctx = platform_get_drvdata(pdev); + cancel_work_sync(&ctx->workq); hwmon_device_unregister(ctx->hwmon_dev); kfifo_free(&ctx->async_msg_fifo); if (acpi_disabled) -- cgit From c93f5e2ab53243b17febabb9422a697017d3d49a Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Fri, 10 Mar 2023 08:50:35 +0100 Subject: hwmon: (ina3221) return prober error code ret is set to 0 which do not indicate an error. Return -EINVAL instead. Fixes: a9e9dd9c6de5 ("hwmon: (ina3221) Read channel input source info from DT") Signed-off-by: Marcus Folkesson Link: https://lore.kernel.org/r/20230310075035.246083-1-marcus.folkesson@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ina3221.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index e06186986444..f3a4c5633b1e 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -772,7 +772,7 @@ static int ina3221_probe_child_from_dt(struct device *dev, return ret; } else if (val > INA3221_CHANNEL3) { dev_err(dev, "invalid reg %d of %pOFn\n", val, child); - return ret; + return -EINVAL; } input = &ina->inputs[val]; -- cgit From 4783b9cb374af02d49740e00e2da19fd4ed6dec4 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 1 Mar 2023 22:14:20 +0000 Subject: x86/mce: Make sure logged MCEs are processed after sysfs update A recent change introduced a flag to queue up errors found during boot-time polling. These errors will be processed during late init once the MCE subsystem is fully set up. A number of sysfs updates call mce_restart() which goes through a subset of the CPU init flow. This includes polling MCA banks and logging any errors found. Since the same function is used as boot-time polling, errors will be queued. However, the system is now past late init, so the errors will remain queued until another error is found and the workqueue is triggered. Call mce_schedule_work() at the end of mce_restart() so that queued errors are processed. Fixes: 3bff147b187d ("x86/mce: Defer processing of early errors") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tony Luck Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230301221420.2203184-1-yazen.ghannam@amd.com --- arch/x86/kernel/cpu/mce/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 7832a69d170e..2eec60f50057 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2355,6 +2355,7 @@ static void mce_restart(void) { mce_timer_delete_all(); on_each_cpu(mce_cpu_restart, NULL, 1); + mce_schedule_work(); } /* Toggle features for corrected errors */ -- cgit From 8d655e65237643c48ada2c131b83679bf1105373 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 12 Mar 2023 09:03:12 -0700 Subject: hwmon: (ucd90320) Add minimum delay between bus accesses When probing the ucd90320 access to some of the registers randomly fails. Sometimes it NACKs a transfer, sometimes it returns just random data and the PEC check fails. Experimentation shows that this seems to be triggered by a register access directly back to back with a previous register write. Experimentation also shows that inserting a small delay after register writes makes the issue go away. Use a similar solution to what the max15301 driver does to solve the same problem. Create a custom set of bus read and write functions that make sure that the delay is added. Fixes: a470f11c5ba2 ("hwmon: (pmbus/ucd9000) Add support for UCD90320 Power Sequencer") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20230312160312.2227405-1-lars@metafoo.de Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/ucd9000.c | 75 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c index 75fc770c9e40..3daaf2237832 100644 --- a/drivers/hwmon/pmbus/ucd9000.c +++ b/drivers/hwmon/pmbus/ucd9000.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include #include "pmbus.h" enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd90320, ucd9090, @@ -65,6 +67,7 @@ struct ucd9000_data { struct gpio_chip gpio; #endif struct dentry *debugfs; + ktime_t write_time; }; #define to_ucd9000_data(_info) container_of(_info, struct ucd9000_data, info) @@ -73,6 +76,73 @@ struct ucd9000_debugfs_entry { u8 index; }; +/* + * It has been observed that the UCD90320 randomly fails register access when + * doing another access right on the back of a register write. To mitigate this + * make sure that there is a minimum delay between a write access and the + * following access. The 250us is based on experimental data. At a delay of + * 200us the issue seems to go away. Add a bit of extra margin to allow for + * system to system differences. + */ +#define UCD90320_WAIT_DELAY_US 250 + +static inline void ucd90320_wait(const struct ucd9000_data *data) +{ + s64 delta = ktime_us_delta(ktime_get(), data->write_time); + + if (delta < UCD90320_WAIT_DELAY_US) + udelay(UCD90320_WAIT_DELAY_US - delta); +} + +static int ucd90320_read_word_data(struct i2c_client *client, int page, + int phase, int reg) +{ + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct ucd9000_data *data = to_ucd9000_data(info); + + if (reg >= PMBUS_VIRT_BASE) + return -ENXIO; + + ucd90320_wait(data); + return pmbus_read_word_data(client, page, phase, reg); +} + +static int ucd90320_read_byte_data(struct i2c_client *client, int page, int reg) +{ + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct ucd9000_data *data = to_ucd9000_data(info); + + ucd90320_wait(data); + return pmbus_read_byte_data(client, page, reg); +} + +static int ucd90320_write_word_data(struct i2c_client *client, int page, + int reg, u16 word) +{ + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct ucd9000_data *data = to_ucd9000_data(info); + int ret; + + ucd90320_wait(data); + ret = pmbus_write_word_data(client, page, reg, word); + data->write_time = ktime_get(); + + return ret; +} + +static int ucd90320_write_byte(struct i2c_client *client, int page, u8 value) +{ + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct ucd9000_data *data = to_ucd9000_data(info); + int ret; + + ucd90320_wait(data); + ret = pmbus_write_byte(client, page, value); + data->write_time = ktime_get(); + + return ret; +} + static int ucd9000_get_fan_config(struct i2c_client *client, int fan) { int fan_config = 0; @@ -598,6 +668,11 @@ static int ucd9000_probe(struct i2c_client *client) info->read_byte_data = ucd9000_read_byte_data; info->func[0] |= PMBUS_HAVE_FAN12 | PMBUS_HAVE_STATUS_FAN12 | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN34; + } else if (mid->driver_data == ucd90320) { + info->read_byte_data = ucd90320_read_byte_data; + info->read_word_data = ucd90320_read_word_data; + info->write_byte = ucd90320_write_byte; + info->write_word_data = ucd90320_write_word_data; } ucd9000_probe_gpio(client, mid, data); -- cgit From 00d85e81796b17a29a0e096c5a4735daa47adef8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 12 Mar 2023 20:37:23 +0100 Subject: hwmon: tmp512: drop of_match_ptr for ID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver will match mostly by DT table (even thought there is regular ID table) so there is little benefit in of_match_ptr (this also allows ACPI matching via PRP0001, even though it might not be relevant here). This also fixes !CONFIG_OF error: drivers/hwmon/tmp513.c:610:34: error: ‘tmp51x_of_match’ defined but not used [-Werror=unused-const-variable=] Fixes: 59dfa75e5d82 ("hwmon: Add driver for Texas Instruments TMP512/513 sensor chips.") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230312193723.478032-2-krzysztof.kozlowski@linaro.org Signed-off-by: Guenter Roeck --- drivers/hwmon/tmp513.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c index 47bbe47e062f..7d5f7441aceb 100644 --- a/drivers/hwmon/tmp513.c +++ b/drivers/hwmon/tmp513.c @@ -758,7 +758,7 @@ static int tmp51x_probe(struct i2c_client *client) static struct i2c_driver tmp51x_driver = { .driver = { .name = "tmp51x", - .of_match_table = of_match_ptr(tmp51x_of_match), + .of_match_table = tmp51x_of_match, }, .probe_new = tmp51x_probe, .id_table = tmp51x_id, -- cgit From 06615d11cc78162dfd5116efb71f29eb29502d37 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sun, 12 Mar 2023 01:46:50 +0800 Subject: power: supply: da9150: Fix use after free bug in da9150_charger_remove due to race condition In da9150_charger_probe, &charger->otg_work is bound with da9150_charger_otg_work. da9150_charger_otg_ncb may be called to start the work. If we remove the module which will call da9150_charger_remove to make cleanup, there may be a unfinished work. The possible sequence is as follows: Fix it by canceling the work before cleanup in the da9150_charger_remove CPU0 CPUc1 |da9150_charger_otg_work da9150_charger_remove | power_supply_unregister | device_unregister | power_supply_dev_release| kfree(psy) | | | power_supply_changed(charger->usb); | //use Fixes: c1a281e34dae ("power: Add support for DA9150 Charger") Signed-off-by: Zheng Wang Signed-off-by: Sebastian Reichel --- drivers/power/supply/da9150-charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/da9150-charger.c b/drivers/power/supply/da9150-charger.c index 14da5c595dd9..a87aeaea38e1 100644 --- a/drivers/power/supply/da9150-charger.c +++ b/drivers/power/supply/da9150-charger.c @@ -657,6 +657,7 @@ static int da9150_charger_remove(struct platform_device *pdev) if (!IS_ERR_OR_NULL(charger->usb_phy)) usb_unregister_notifier(charger->usb_phy, &charger->otg_nb); + cancel_work_sync(&charger->otg_work); power_supply_unregister(charger->battery); power_supply_unregister(charger->usb); -- cgit From 5a522150093a0eabae9470a70a37a6e436bfad08 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 1 Mar 2023 22:02:01 +0530 Subject: vhost-vdpa: free iommu domain after last use during cleanup Currently vhost_vdpa_cleanup() unmaps the DMA mappings by calling `iommu_unmap(v->domain, map->start, map->size);` from vhost_vdpa_general_unmap() when the parent vDPA driver doesn't provide DMA config operations. However, the IOMMU domain referred to by `v->domain` is freed in vhost_vdpa_free_domain() before vhost_vdpa_cleanup() in vhost_vdpa_release() which results in NULL pointer de-reference. Accordingly, moving the call to vhost_vdpa_free_domain() in vhost_vdpa_cleanup() would makes sense. This will also help detaching the dma device in error handling of vhost_vdpa_alloc_domain(). This issue was observed on terminating QEMU with SIGQUIT. Fixes: 037d4305569a ("vhost-vdpa: call vhost_vdpa_cleanup during the release") Signed-off-by: Gautam Dawar Message-Id: <20230301163203.29883-1-gautam.dawar@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index dc12dbd5b43b..7be9d9d8f01c 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1169,6 +1169,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) err_attach: iommu_domain_free(v->domain); + v->domain = NULL; return ret; } @@ -1213,6 +1214,7 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v) vhost_vdpa_remove_as(v, asid); } + vhost_vdpa_free_domain(v); vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); } @@ -1285,7 +1287,6 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_vdpa_clean_irq(v); vhost_vdpa_reset(v); vhost_dev_stop(&v->vdev); - vhost_vdpa_free_domain(v); vhost_vdpa_config_put(v); vhost_vdpa_cleanup(v); mutex_unlock(&d->mutex); -- cgit From b4cca6d48eb3fa6f0d9caba4329b1a2b0ff67a77 Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Thu, 2 Mar 2023 19:18:57 +0100 Subject: vdpa_sim: set last_used_idx as last_avail_idx in vdpasim_queue_ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from an used_idx different than 0 is needed in use cases like virtual machine migration. Not doing so and letting the caller set an avail idx different than 0 causes destination device to try to use old buffers that source driver already recover and are not available anymore. Since vdpa_sim does not support receive inflight descriptors as a destination of a migration, let's set both avail_idx and used_idx the same at vq start. This is how vhost-user works in a VHOST_SET_VRING_BASE call. Although the simple fix is to set last_used_idx at vdpasim_set_vq_state, it would be reset at vdpasim_queue_ready. The last_avail_idx case is fixed with commit 0e84f918fac8 ("vdpa_sim: not reset state in vdpasim_queue_ready"). Since the only option is to make it equal to last_avail_idx, adding the only change needed here. This was discovered and tested live migrating the vdpa_sim_net device. Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator") Reviewed-by: Stefano Garzarella Signed-off-by: Eugenio Pérez Message-Id: <20230302181857.925374-1-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 6a0a65814626..eea23c630f7c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -68,6 +68,17 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) (uintptr_t)vq->device_addr); vq->vring.last_avail_idx = last_avail_idx; + + /* + * Since vdpa_sim does not support receive inflight descriptors as a + * destination of a migration, let's set both avail_idx and used_idx + * the same at vq start. This is how vhost-user works in a + * VHOST_SET_VRING_BASE call. + * + * Although the simple fix is to set last_used_idx at + * vdpasim_set_vq_state, it would be reset at vdpasim_queue_ready. + */ + vq->vring.last_used_idx = last_avail_idx; vq->vring.notify = vdpasim_vq_notify; } -- cgit From ae43c20da2a77c508715a9c77845b4e87e6a1e25 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Thu, 9 Mar 2023 14:13:07 +0800 Subject: tools/virtio: Ignore virtio-trace/trace-agent since commit 108fc82596e3("tools: Add guest trace agent as a user tool") introduce virtio-trace/trace-agent, it should be ignored in the git tree. Signed-off-by: Rong Tao Message-Id: Signed-off-by: Michael S. Tsirkin --- tools/virtio/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore index 075588c4da08..9934d48d9a55 100644 --- a/tools/virtio/.gitignore +++ b/tools/virtio/.gitignore @@ -2,3 +2,4 @@ *.d virtio_test vringh_test +virtio-trace/trace-agent -- cgit From a52e5cdbe8016d4e3e6322fd93d71afddb9a5af9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 7 Mar 2023 14:35:23 +0100 Subject: s390/ipl: add missing intersection check to ipl_report handling The code which handles the ipl report is searching for a free location in memory where it could copy the component and certificate entries to. It checks for intersection between the sections required for the kernel and the component/certificate data area, but fails to check whether the data structures linking these data areas together intersect. This might cause the iplreport copy code to overwrite the iplreport itself. Fix this by adding two addtional intersection checks. Cc: Fixes: 9641b8cc733f ("s390/ipl: read IPL report at early boot") Signed-off-by: Sven Schnelle Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_report.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index 9b14045065b6..74b5cd264862 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -57,11 +57,19 @@ repeat: if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size && intersects(initrd_data.start, initrd_data.size, safe_addr, size)) safe_addr = initrd_data.start + initrd_data.size; + if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) { + safe_addr = (unsigned long)comps + comps->len; + goto repeat; + } for_each_rb_entry(comp, comps) if (intersects(safe_addr, size, comp->addr, comp->len)) { safe_addr = comp->addr + comp->len; goto repeat; } + if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) { + safe_addr = (unsigned long)certs + certs->len; + goto repeat; + } for_each_rb_entry(cert, certs) if (intersects(safe_addr, size, cert->addr, cert->len)) { safe_addr = cert->addr + cert->len; -- cgit From ab909509850b27fd39b8ba99e44cda39dbc3858c Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 6 Mar 2023 16:10:11 +0100 Subject: PCI: s390: Fix use-after-free of PCI resources with per-function hotplug On s390 PCI functions may be hotplugged individually even when they belong to a multi-function device. In particular on an SR-IOV device VFs may be removed and later re-added. In commit a50297cf8235 ("s390/pci: separate zbus creation from scanning") it was missed however that struct pci_bus and struct zpci_bus's resource list retained a reference to the PCI functions MMIO resources even though those resources are released and freed on hot-unplug. These stale resources may subsequently be claimed when the PCI function re-appears resulting in use-after-free. One idea of fixing this use-after-free in s390 specific code that was investigated was to simply keep resources around from the moment a PCI function first appeared until the whole virtual PCI bus created for a multi-function device disappears. The problem with this however is that due to the requirement of artificial MMIO addreesses (address cookies) extra logic is then needed to keep the address cookies compatible on re-plug. At the same time the MMIO resources semantically belong to the PCI function so tying their lifecycle to the function seems more logical. Instead a simpler approach is to remove the resources of an individually hot-unplugged PCI function from the PCI bus's resource list while keeping the resources of other PCI functions on the PCI bus untouched. This is done by introducing pci_bus_remove_resource() to remove an individual resource. Similarly the resource also needs to be removed from the struct zpci_bus's resource list. It turns out however, that there is really no need to add the MMIO resources to the struct zpci_bus's resource list at all and instead we can simply use the zpci_bar_struct's resource pointer directly. Fixes: a50297cf8235 ("s390/pci: separate zbus creation from scanning") Signed-off-by: Niklas Schnelle Reviewed-by: Matthew Rosato Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20230306151014.60913-2-schnelle@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci.c | 16 ++++++++++------ arch/s390/pci/pci_bus.c | 12 +++++------- arch/s390/pci/pci_bus.h | 3 +-- drivers/pci/bus.c | 21 +++++++++++++++++++++ include/linux/pci.h | 1 + 5 files changed, 38 insertions(+), 15 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index ef38b1514c77..e16afacc8fd1 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -544,8 +544,7 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start, return r; } -int zpci_setup_bus_resources(struct zpci_dev *zdev, - struct list_head *resources) +int zpci_setup_bus_resources(struct zpci_dev *zdev) { unsigned long addr, size, flags; struct resource *res; @@ -581,7 +580,6 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev, return -ENOMEM; } zdev->bars[i].res = res; - pci_add_resource(resources, res); } zdev->has_resources = 1; @@ -590,17 +588,23 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev, static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) { + struct resource *res; int i; + pci_lock_rescan_remove(); for (i = 0; i < PCI_STD_NUM_BARS; i++) { - if (!zdev->bars[i].size || !zdev->bars[i].res) + res = zdev->bars[i].res; + if (!res) continue; + release_resource(res); + pci_bus_remove_resource(zdev->zbus->bus, res); zpci_free_iomap(zdev, zdev->bars[i].map_idx); - release_resource(zdev->bars[i].res); - kfree(zdev->bars[i].res); + zdev->bars[i].res = NULL; + kfree(res); } zdev->has_resources = 0; + pci_unlock_rescan_remove(); } int pcibios_device_add(struct pci_dev *pdev) diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 6a8da1b742ae..a99926af2b69 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -41,9 +41,7 @@ static int zpci_nb_devices; */ static int zpci_bus_prepare_device(struct zpci_dev *zdev) { - struct resource_entry *window, *n; - struct resource *res; - int rc; + int rc, i; if (!zdev_enabled(zdev)) { rc = zpci_enable_device(zdev); @@ -57,10 +55,10 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev) } if (!zdev->has_resources) { - zpci_setup_bus_resources(zdev, &zdev->zbus->resources); - resource_list_for_each_entry_safe(window, n, &zdev->zbus->resources) { - res = window->res; - pci_bus_add_resource(zdev->zbus->bus, res, 0); + zpci_setup_bus_resources(zdev); + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (zdev->bars[i].res) + pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0); } } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index e96c9860e064..af9f0ac79a1b 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -30,8 +30,7 @@ static inline void zpci_zdev_get(struct zpci_dev *zdev) int zpci_alloc_domain(int domain); void zpci_free_domain(int domain); -int zpci_setup_bus_resources(struct zpci_dev *zdev, - struct list_head *resources); +int zpci_setup_bus_resources(struct zpci_dev *zdev); static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus, unsigned int devfn) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 83ae838ceb5f..549c4bd5caec 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -76,6 +76,27 @@ struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n) } EXPORT_SYMBOL_GPL(pci_bus_resource_n); +void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res) +{ + struct pci_bus_resource *bus_res, *tmp; + int i; + + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { + if (bus->resource[i] == res) { + bus->resource[i] = NULL; + return; + } + } + + list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) { + if (bus_res->res == res) { + list_del(&bus_res->list); + kfree(bus_res); + return; + } + } +} + void pci_bus_remove_resources(struct pci_bus *bus) { int i; diff --git a/include/linux/pci.h b/include/linux/pci.h index fafd8020c6d7..b50e5c79f7e3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1438,6 +1438,7 @@ void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, unsigned int flags); struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n); void pci_bus_remove_resources(struct pci_bus *bus); +void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res); int devm_request_pci_bus_resources(struct device *dev, struct list_head *resources); -- cgit From d7a0bdbf17276b757d2b89f5351bbee9ecf58fe6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Mar 2023 14:19:55 +0100 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 13 ++++--------- arch/s390/configs/defconfig | 12 +++--------- arch/s390/configs/zfcpdump_defconfig | 2 +- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 3c68fe49042c..4ccf66d29fc2 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -23,7 +23,6 @@ CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y @@ -90,7 +89,6 @@ CONFIG_MINIX_SUBPARTITION=y CONFIG_SOLARIS_X86_PARTITION=y CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y -CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y CONFIG_ZSMALLOC_STAT=y @@ -298,7 +296,6 @@ CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_TARGET_MASQUERADE=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m @@ -340,7 +337,6 @@ CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_HFSC=m CONFIG_NET_SCH_PRIO=m @@ -351,7 +347,6 @@ CONFIG_NET_SCH_SFQ=m CONFIG_NET_SCH_TEQL=m CONFIG_NET_SCH_TBF=m CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_DRR=m CONFIG_NET_SCH_MQPRIO=m @@ -363,14 +358,11 @@ CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m CONFIG_CLS_U32_PERF=y CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m @@ -584,7 +576,7 @@ CONFIG_DIAG288_WATCHDOG=m CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -# CONFIG_HID is not set +# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m @@ -828,6 +820,7 @@ CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_TEST_LOCKUP=m +CONFIG_DEBUG_PREEMPT=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_ATOMIC_SLEEP=y @@ -843,6 +836,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300 # CONFIG_RCU_TRACE is not set CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y +CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y CONFIG_IRQSOFF_TRACER=y @@ -857,6 +851,7 @@ CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m +CONFIG_SAMPLE_FTRACE_OPS=m CONFIG_DEBUG_ENTRY=y CONFIG_CIO_INJECT=y CONFIG_KUNIT=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 9ab91632f74c..693297a2e897 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -21,7 +21,6 @@ CONFIG_NUMA_BALANCING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y @@ -85,7 +84,6 @@ CONFIG_MINIX_SUBPARTITION=y CONFIG_SOLARIS_X86_PARTITION=y CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y -CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y CONFIG_ZSMALLOC_STAT=y @@ -289,7 +287,6 @@ CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_TARGET_MASQUERADE=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m @@ -330,7 +327,6 @@ CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_HFSC=m CONFIG_NET_SCH_PRIO=m @@ -341,7 +337,6 @@ CONFIG_NET_SCH_SFQ=m CONFIG_NET_SCH_TEQL=m CONFIG_NET_SCH_TBF=m CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_NETEM=m CONFIG_NET_SCH_DRR=m CONFIG_NET_SCH_MQPRIO=m @@ -353,14 +348,11 @@ CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_PLUG=m CONFIG_NET_SCH_ETS=m CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m CONFIG_CLS_U32_PERF=y CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m @@ -573,7 +565,7 @@ CONFIG_DIAG288_WATCHDOG=m CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -# CONFIG_HID is not set +# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_ACCESS=m @@ -795,6 +787,7 @@ CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y +CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y @@ -805,6 +798,7 @@ CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m +CONFIG_SAMPLE_FTRACE_OPS=m CONFIG_KUNIT=m CONFIG_KUNIT_DEBUGFS=y CONFIG_LKDTM=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index a9c0c81d1de9..33a232bb68af 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -58,7 +58,7 @@ CONFIG_ZFCP=y # CONFIG_VMCP is not set # CONFIG_MONWRITER is not set # CONFIG_S390_VMUR is not set -# CONFIG_HID is not set +# CONFIG_HID_SUPPORT is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set -- cgit From ee892ea83d99610fa33bea612de058e0955eec3a Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 3 Feb 2023 07:53:09 -0800 Subject: drm/i915/hwmon: Enable PL1 power limit Previous documentation suggested that PL1 power limit is always enabled. However we now find this not to be the case on some platforms (such as ATSM). Therefore enable PL1 power limit during hwmon initialization. Bspec: 51864 v2: Add Bspec reference (Gwan-gyeong) v3: Add Fixes tag Fixes: 99f55efb79114 ("drm/i915/hwmon: Power PL1 limit and TDP setting") Signed-off-by: Ashutosh Dixit Reviewed-by: Gwan-gyeong Mun Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230203155309.1042297-1-ashutosh.dixit@intel.com (cherry picked from commit 0349c41b05968befaffa5fbb7e73d0ee6004f610) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_hwmon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 1225bc432f0d..4683a5b96eff 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct drm_i915_private *i915) for_each_gt(gt, i915, i) hwm_energy(&hwmon->ddat_gt[i], &energy); } + + /* Enable PL1 power limit */ + if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, + PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN); } void i915_hwmon_register(struct drm_i915_private *i915) -- cgit From 897f453c106380e57600c19a0a0485ceb4f3b0d9 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Feb 2023 17:17:30 +0200 Subject: drm/i915: Fix audio ELD handling for DP MST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I forgot to call intel_audio_compute_config() on DP MST, which means ELD doesn't get populated and passed to the audio driver. References: https://gitlab.freedesktop.org/drm/intel/-/issues/8097 Fixes: 5d986635e296 ("drm/i915/audio: Precompute the ELD") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230220151731.6852-1-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar (cherry picked from commit 518b761a7b0e2bb2fac2518f041c71b461adf761) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 054a009e800d..2106b3de225a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -265,6 +265,19 @@ static int intel_dp_mst_update_slots(struct intel_encoder *encoder, return 0; } +static bool intel_dp_mst_has_audio(const struct drm_connector_state *conn_state) +{ + const struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(conn_state); + struct intel_connector *connector = + to_intel_connector(conn_state->connector); + + if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) + return connector->port->has_audio; + else + return intel_conn_state->force_audio == HDMI_AUDIO_ON; +} + static int intel_dp_mst_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -272,10 +285,6 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder); struct intel_dp *intel_dp = &intel_mst->primary->dp; - struct intel_connector *connector = - to_intel_connector(conn_state->connector); - struct intel_digital_connector_state *intel_conn_state = - to_intel_digital_connector_state(conn_state); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; struct link_config_limits limits; @@ -287,11 +296,9 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->has_pch_encoder = false; - if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO) - pipe_config->has_audio = connector->port->has_audio; - else - pipe_config->has_audio = - intel_conn_state->force_audio == HDMI_AUDIO_ON; + pipe_config->has_audio = + intel_dp_mst_has_audio(conn_state) && + intel_audio_compute_config(encoder, pipe_config, conn_state); /* * for MST we always configure max link bw - the spec doesn't -- cgit From 71c602103c74b277bef3d20a308874a33ec8326d Mon Sep 17 00:00:00 2001 From: Jouni Högander Date: Tue, 21 Feb 2023 10:53:04 +0200 Subject: drm/i915/psr: Use calculated io and fast wake lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we are using hardcoded 7 for io and fast wake lines. According to Bspec io and fast wake times are both 42us for DISPLAY_VER >= 12 and 50us and 32us for older platforms. Calculate line counts for these and configure them into PSR2_CTL accordingly Use 45 us for the fast wake calculation as 42 seems to be too tight based on testing. Bspec: 49274, 4289 Cc: Mika Kahola Cc: José Roberto de Souza Fixes: 64cf40a125ff ("drm/i915/psr: Program default IO buffer Wake and Fast Wake") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7725 Signed-off-by: Jouni Högander Reviewed-by: Stanislav Lisovskiy Link: https://patchwork.freedesktop.org/patch/msgid/20230221085304.3382297-1-jouni.hogander@intel.com (cherry picked from commit cb42e8ede5b475c096e473b86c356b1158b4bc3b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_types.h | 2 + drivers/gpu/drm/i915/display/intel_psr.c | 78 +++++++++++++++++----- 2 files changed, 63 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 54c517ca9632..582234f0c49a 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1631,6 +1631,8 @@ struct intel_psr { bool psr2_sel_fetch_cff_enabled; bool req_psr2_sdp_prior_scanline; u8 sink_sync_latency; + u8 io_wake_lines; + u8 fast_wake_lines; ktime_t last_entry_attempt; ktime_t last_exit; bool sink_not_reliable; diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 7a72e15e6836..9f1a0bebae24 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -542,6 +542,14 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) val |= EDP_PSR2_FRAME_BEFORE_SU(max_t(u8, intel_dp->psr.sink_sync_latency + 1, 2)); val |= intel_psr2_get_tp_time(intel_dp); + if (DISPLAY_VER(dev_priv) >= 12) { + if (intel_dp->psr.io_wake_lines < 9 && + intel_dp->psr.fast_wake_lines < 9) + val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2; + else + val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_3; + } + /* Wa_22012278275:adl-p */ if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_E0)) { static const u8 map[] = { @@ -558,31 +566,21 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) * Still using the default IO_BUFFER_WAKE and FAST_WAKE, see * comments bellow for more information */ - u32 tmp, lines = 7; - - val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2; + u32 tmp; - tmp = map[lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES]; + tmp = map[intel_dp->psr.io_wake_lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES]; tmp = tmp << TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT; val |= tmp; - tmp = map[lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES]; + tmp = map[intel_dp->psr.fast_wake_lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES]; tmp = tmp << TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT; val |= tmp; } else if (DISPLAY_VER(dev_priv) >= 12) { - /* - * TODO: 7 lines of IO_BUFFER_WAKE and FAST_WAKE are default - * values from BSpec. In order to setting an optimal power - * consumption, lower than 4k resolution mode needs to decrease - * IO_BUFFER_WAKE and FAST_WAKE. And higher than 4K resolution - * mode needs to increase IO_BUFFER_WAKE and FAST_WAKE. - */ - val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2; - val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(7); - val |= TGL_EDP_PSR2_FAST_WAKE(7); + val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines); + val |= TGL_EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines); } else if (DISPLAY_VER(dev_priv) >= 9) { - val |= EDP_PSR2_IO_BUFFER_WAKE(7); - val |= EDP_PSR2_FAST_WAKE(7); + val |= EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines); + val |= EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines); } if (intel_dp->psr.req_psr2_sdp_prior_scanline) @@ -842,6 +840,46 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d return true; } +static bool _compute_psr2_wake_times(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + int io_wake_lines, io_wake_time, fast_wake_lines, fast_wake_time; + u8 max_wake_lines; + + if (DISPLAY_VER(i915) >= 12) { + io_wake_time = 42; + /* + * According to Bspec it's 42us, but based on testing + * it is not enough -> use 45 us. + */ + fast_wake_time = 45; + max_wake_lines = 12; + } else { + io_wake_time = 50; + fast_wake_time = 32; + max_wake_lines = 8; + } + + io_wake_lines = intel_usecs_to_scanlines( + &crtc_state->uapi.adjusted_mode, io_wake_time); + fast_wake_lines = intel_usecs_to_scanlines( + &crtc_state->uapi.adjusted_mode, fast_wake_time); + + if (io_wake_lines > max_wake_lines || + fast_wake_lines > max_wake_lines) + return false; + + if (i915->params.psr_safest_params) + io_wake_lines = fast_wake_lines = max_wake_lines; + + /* According to Bspec lower limit should be set as 7 lines. */ + intel_dp->psr.io_wake_lines = max(io_wake_lines, 7); + intel_dp->psr.fast_wake_lines = max(fast_wake_lines, 7); + + return true; +} + static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -936,6 +974,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } + if (!_compute_psr2_wake_times(intel_dp, crtc_state)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, Unable to use long enough wake times\n"); + return false; + } + if (HAS_PSR2_SEL_FETCH(dev_priv)) { if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) && !HAS_PSR_HW_TRACKING(dev_priv)) { -- cgit From 46bc23dcd94569270d02c4c1f7e62ae01ebd53bb Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Thu, 23 Feb 2023 10:06:19 +0530 Subject: drm/i915/dg2: Add HDMI pixel clock frequencies 267.30 and 319.89 MHz Add snps phy table values for HDMI pixel clocks 267.30 MHz and 319.89 MHz. Values are based on the Bspec algorithm for PLL programming for HDMI. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8008 Signed-off-by: Ankit Nautiyal Reviewed-by: Uma Shankar Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20230223043619.3941382-1-ankit.k.nautiyal@intel.com (cherry picked from commit d46746b8b13cbd377ffc733e465d25800459a31b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_snps_phy.c | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c index c65c771f5c46..1cfb94b5cedb 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_phy.c +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -1419,6 +1419,36 @@ static const struct intel_mpllb_state dg2_hdmi_262750 = { REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), }; +static const struct intel_mpllb_state dg2_hdmi_267300 = { + .clock = 267300, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 74) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 30146) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 36699), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + static const struct intel_mpllb_state dg2_hdmi_268500 = { .clock = 268500, .ref_control = @@ -1509,6 +1539,36 @@ static const struct intel_mpllb_state dg2_hdmi_241500 = { REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), }; +static const struct intel_mpllb_state dg2_hdmi_319890 = { + .clock = 319890, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 94) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 64094) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 13631), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + static const struct intel_mpllb_state dg2_hdmi_497750 = { .clock = 497750, .ref_control = @@ -1696,8 +1756,10 @@ static const struct intel_mpllb_state * const dg2_hdmi_tables[] = { &dg2_hdmi_209800, &dg2_hdmi_241500, &dg2_hdmi_262750, + &dg2_hdmi_267300, &dg2_hdmi_268500, &dg2_hdmi_296703, + &dg2_hdmi_319890, &dg2_hdmi_497750, &dg2_hdmi_592000, &dg2_hdmi_593407, -- cgit From 193c41926d152761764894f46e23b53c00186a82 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 20 Feb 2023 18:18:58 +0100 Subject: drm/i915/sseu: fix max_subslices array-index-out-of-bounds access It seems that commit bc3c5e0809ae ("drm/i915/sseu: Don't try to store EU mask internally in UAPI format") exposed a potential out-of-bounds access, reported by UBSAN as following on a laptop with a gen 11 i915 card: UBSAN: array-index-out-of-bounds in drivers/gpu/drm/i915/gt/intel_sseu.c:65:27 index 6 is out of range for type 'u16 [6]' CPU: 2 PID: 165 Comm: systemd-udevd Not tainted 6.2.0-9-generic #9-Ubuntu Hardware name: Dell Inc. XPS 13 9300/077Y9N, BIOS 1.11.0 03/22/2022 Call Trace: show_stack+0x4e/0x61 dump_stack_lvl+0x4a/0x6f dump_stack+0x10/0x18 ubsan_epilogue+0x9/0x3a __ubsan_handle_out_of_bounds.cold+0x42/0x47 gen11_compute_sseu_info+0x121/0x130 [i915] intel_sseu_info_init+0x15d/0x2b0 [i915] intel_gt_init_mmio+0x23/0x40 [i915] i915_driver_mmio_probe+0x129/0x400 [i915] ? intel_gt_probe_all+0x91/0x2e0 [i915] i915_driver_probe+0xe1/0x3f0 [i915] ? drm_privacy_screen_get+0x16d/0x190 [drm] ? acpi_dev_found+0x64/0x80 i915_pci_probe+0xac/0x1b0 [i915] ... According to the definition of sseu_dev_info, eu_mask->hsw is limited to a maximum of GEN_MAX_SS_PER_HSW_SLICE (6) sub-slices, but gen11_sseu_info_init() can potentially set 8 sub-slices, in the !IS_JSL_EHL(gt->i915) case. Fix this by reserving up to 8 slots for max_subslices in the eu_mask struct. Reported-by: Emil Renner Berthing Signed-off-by: Andrea Righi Fixes: bc3c5e0809ae ("drm/i915/sseu: Don't try to store EU mask internally in UAPI format") Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230220171858.131416-1-andrea.righi@canonical.com (cherry picked from commit 3cba09a6ac86ea1d456909626eb2685596c07822) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_sseu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index aa87d3832d60..d7e8c374f153 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -27,7 +27,7 @@ struct drm_printer; * is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the * I915_MAX_SS_FUSE_BITS value below). */ -#define GEN_MAX_SS_PER_HSW_SLICE 6 +#define GEN_MAX_SS_PER_HSW_SLICE 8 /* * Maximum number of 32-bit registers used by hardware to express the -- cgit From e0e6b416b25ee14716f3549e0cbec1011b193809 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Thu, 2 Mar 2023 13:08:20 +0100 Subject: drm/i915/active: Fix misuse of non-idle barriers as fence trackers Users reported oopses on list corruptions when using i915 perf with a number of concurrently running graphics applications. Root cause analysis pointed at an issue in barrier processing code -- a race among perf open / close replacing active barriers with perf requests on kernel context and concurrent barrier preallocate / acquire operations performed during user context first pin / last unpin. When adding a request to a composite tracker, we try to reuse an existing fence tracker, already allocated and registered with that composite. The tracker we obtain may already track another fence, may be an idle barrier, or an active barrier. If the tracker we get occurs a non-idle barrier then we try to delete that barrier from a list of barrier tasks it belongs to. However, while doing that we don't respect return value from a function that performs the barrier deletion. Should the deletion ever fail, we would end up reusing the tracker still registered as a barrier task. Since the same structure field is reused with both fence callback lists and barrier tasks list, list corruptions would likely occur. Barriers are now deleted from a barrier tasks list by temporarily removing the list content, traversing that content with skip over the node to be deleted, then populating the list back with the modified content. Should that intentionally racy concurrent deletion attempts be not serialized, one or more of those may fail because of the list being temporary empty. Related code that ignores the results of barrier deletion was initially introduced in v5.4 by commit d8af05ff38ae ("drm/i915: Allow sharing the idle-barrier from other kernel requests"). However, all users of the barrier deletion routine were apparently serialized at that time, then the issue didn't exhibit itself. Results of git bisect with help of a newly developed igt@gem_barrier_race@remote-request IGT test indicate that list corruptions might start to appear after commit 311770173fac ("drm/i915/gt: Schedule request retirement when timeline idles"), introduced in v5.5. Respect results of barrier deletion attempts -- mark the barrier as idle only if successfully deleted from the list. Then, before proceeding with setting our fence as the one currently tracked, make sure that the tracker we've got is not a non-idle barrier. If that check fails then don't use that tracker but go back and try to acquire a new, usable one. v3: use unlikely() to document what outcome we expect (Andi), - fix bad grammar in commit description. v2: no code changes, - blame commit 311770173fac ("drm/i915/gt: Schedule request retirement when timeline idles"), v5.5, not commit d8af05ff38ae ("drm/i915: Allow sharing the idle-barrier from other kernel requests"), v5.4, - reword commit description. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6333 Fixes: 311770173fac ("drm/i915/gt: Schedule request retirement when timeline idles") Cc: Chris Wilson Cc: stable@vger.kernel.org # v5.5 Cc: Andi Shyti Signed-off-by: Janusz Krzysztofik Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230302120820.48740-1-janusz.krzysztofik@linux.intel.com (cherry picked from commit 506006055769b10d1b2b4e22f636f3b45e0e9fc7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_active.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 7412abf166a8..a9fea115f2d2 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -422,12 +422,12 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active) * we can use it to substitute for the pending idle-barrer * request that we want to emit on the kernel_context. */ - __active_del_barrier(ref, node_from_active(active)); - return true; + return __active_del_barrier(ref, node_from_active(active)); } int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) { + u64 idx = i915_request_timeline(rq)->fence_context; struct dma_fence *fence = &rq->fence; struct i915_active_fence *active; int err; @@ -437,16 +437,19 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) if (err) return err; - active = active_instance(ref, i915_request_timeline(rq)->fence_context); - if (!active) { - err = -ENOMEM; - goto out; - } + do { + active = active_instance(ref, idx); + if (!active) { + err = -ENOMEM; + goto out; + } + + if (replace_barrier(ref, active)) { + RCU_INIT_POINTER(active->fence, NULL); + atomic_dec(&ref->count); + } + } while (unlikely(is_barrier(active))); - if (replace_barrier(ref, active)) { - RCU_INIT_POINTER(active->fence, NULL); - atomic_dec(&ref->count); - } if (!__i915_active_fence_set(active, fence)) __i915_active_acquire(ref); -- cgit From acec726473822bc6b585961f4ca2a11fa7f28341 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 3 Mar 2023 11:25:08 +0200 Subject: thunderbolt: Fix memory leak in margining Memory for the usb4->margining needs to be relased for the upstream port of the router as well, even though the debugfs directory gets released with the router device removal. Fix this. Fixes: d0f1e0c2a699 ("thunderbolt: Add support for receiver lane margining") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/debugfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 4339e706cc3a..f92ad71ef983 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -942,7 +942,8 @@ static void margining_port_remove(struct tb_port *port) snprintf(dir_name, sizeof(dir_name), "port%d", port->port); parent = debugfs_lookup(dir_name, port->sw->debugfs_dir); - debugfs_remove_recursive(debugfs_lookup("margining", parent)); + if (parent) + debugfs_remove_recursive(debugfs_lookup("margining", parent)); kfree(port->usb4->margining); port->usb4->margining = NULL; @@ -967,19 +968,18 @@ static void margining_switch_init(struct tb_switch *sw) static void margining_switch_remove(struct tb_switch *sw) { + struct tb_port *upstream, *downstream; struct tb_switch *parent_sw; - struct tb_port *downstream; u64 route = tb_route(sw); if (!route) return; - /* - * Upstream is removed with the router itself but we need to - * remove the downstream port margining directory. - */ + upstream = tb_upstream_port(sw); parent_sw = tb_switch_parent(sw); downstream = tb_port_at(route, parent_sw); + + margining_port_remove(upstream); margining_port_remove(downstream); } -- cgit From cd0c1e582b055dea615001b8bd8eccaf6f69f7ce Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Fri, 3 Mar 2023 00:17:24 +0200 Subject: thunderbolt: Add missing UNSET_INBOUND_SBTX for retimer access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to USB4 retimer specification, the process of firmware update sequence requires issuing a SET_INBOUND_SBTX port operation that later shall be followed by UNSET_INBOUND_SBTX port operation. This last step is not currently issued by the driver but it is necessary to make sure the retimers are put back to passthrough mode even during enumeration. If this step is missing the link may not come up properly after soft-reboot for example. For this reason issue UNSET_INBOUND_SBTX after SET_INBOUND_SBTX for enumeration and also when the NVM upgrade is run. Reported-by: Christian Schaubschläger Link: https://lore.kernel.org/linux-usb/b556f5ed-5ee8-9990-9910-afd60db93310@gmx.at/ Cc: stable@vger.kernel.org Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg --- drivers/thunderbolt/retimer.c | 23 +++++++++++++++++++++-- drivers/thunderbolt/sb_regs.h | 1 + drivers/thunderbolt/tb.h | 1 + drivers/thunderbolt/usb4.c | 14 ++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 56008eb91e2e..9cc28197dbc4 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -187,6 +187,22 @@ static ssize_t nvm_authenticate_show(struct device *dev, return ret; } +static void tb_retimer_set_inbound_sbtx(struct tb_port *port) +{ + int i; + + for (i = 1; i <= TB_MAX_RETIMER_INDEX; i++) + usb4_port_retimer_set_inbound_sbtx(port, i); +} + +static void tb_retimer_unset_inbound_sbtx(struct tb_port *port) +{ + int i; + + for (i = TB_MAX_RETIMER_INDEX; i >= 1; i--) + usb4_port_retimer_unset_inbound_sbtx(port, i); +} + static ssize_t nvm_authenticate_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -213,6 +229,7 @@ static ssize_t nvm_authenticate_store(struct device *dev, rt->auth_status = 0; if (val) { + tb_retimer_set_inbound_sbtx(rt->port); if (val == AUTHENTICATE_ONLY) { ret = tb_retimer_nvm_authenticate(rt, true); } else { @@ -232,6 +249,7 @@ static ssize_t nvm_authenticate_store(struct device *dev, } exit_unlock: + tb_retimer_unset_inbound_sbtx(rt->port); mutex_unlock(&rt->tb->lock); exit_rpm: pm_runtime_mark_last_busy(&rt->dev); @@ -440,8 +458,7 @@ int tb_retimer_scan(struct tb_port *port, bool add) * Enable sideband channel for each retimer. We can do this * regardless whether there is device connected or not. */ - for (i = 1; i <= TB_MAX_RETIMER_INDEX; i++) - usb4_port_retimer_set_inbound_sbtx(port, i); + tb_retimer_set_inbound_sbtx(port); /* * Before doing anything else, read the authentication status. @@ -464,6 +481,8 @@ int tb_retimer_scan(struct tb_port *port, bool add) break; } + tb_retimer_unset_inbound_sbtx(port); + if (!last_idx) return 0; diff --git a/drivers/thunderbolt/sb_regs.h b/drivers/thunderbolt/sb_regs.h index 5185cf3e4d97..f37a4320f10a 100644 --- a/drivers/thunderbolt/sb_regs.h +++ b/drivers/thunderbolt/sb_regs.h @@ -20,6 +20,7 @@ enum usb4_sb_opcode { USB4_SB_OPCODE_ROUTER_OFFLINE = 0x4e45534c, /* "LSEN" */ USB4_SB_OPCODE_ENUMERATE_RETIMERS = 0x4d554e45, /* "ENUM" */ USB4_SB_OPCODE_SET_INBOUND_SBTX = 0x5055534c, /* "LSUP" */ + USB4_SB_OPCODE_UNSET_INBOUND_SBTX = 0x50555355, /* "USUP" */ USB4_SB_OPCODE_QUERY_LAST_RETIMER = 0x5453414c, /* "LAST" */ USB4_SB_OPCODE_GET_NVM_SECTOR_SIZE = 0x53534e47, /* "GNSS" */ USB4_SB_OPCODE_NVM_SET_OFFSET = 0x53504f42, /* "BOPS" */ diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 64968c162ec7..b3cd13dc783b 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -1242,6 +1242,7 @@ int usb4_port_sw_margin(struct tb_port *port, unsigned int lanes, bool timing, int usb4_port_sw_margin_errors(struct tb_port *port, u32 *errors); int usb4_port_retimer_set_inbound_sbtx(struct tb_port *port, u8 index); +int usb4_port_retimer_unset_inbound_sbtx(struct tb_port *port, u8 index); int usb4_port_retimer_read(struct tb_port *port, u8 index, u8 reg, void *buf, u8 size); int usb4_port_retimer_write(struct tb_port *port, u8 index, u8 reg, diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 1e5e9c147a31..95ff02395822 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -1578,6 +1578,20 @@ int usb4_port_retimer_set_inbound_sbtx(struct tb_port *port, u8 index) 500); } +/** + * usb4_port_retimer_unset_inbound_sbtx() - Disable sideband channel transactions + * @port: USB4 port + * @index: Retimer index + * + * Disables sideband channel transations on SBTX. The reverse of + * usb4_port_retimer_set_inbound_sbtx(). + */ +int usb4_port_retimer_unset_inbound_sbtx(struct tb_port *port, u8 index) +{ + return usb4_port_retimer_op(port, index, + USB4_SB_OPCODE_UNSET_INBOUND_SBTX, 500); +} + /** * usb4_port_retimer_read() - Read from retimer sideband registers * @port: USB4 port -- cgit From d2d6ddf188f609861489d5d188d545856a3ed399 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 3 Feb 2023 15:55:41 +0200 Subject: thunderbolt: Call tb_check_quirks() after initializing adapters In order to apply quirks based on certain adapter types move call to tb_check_quirks() happen after the adapters are initialized. This should not affect the existing quirks. Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 3370e18ba05f..da373ac38285 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -2968,8 +2968,6 @@ int tb_switch_add(struct tb_switch *sw) dev_warn(&sw->dev, "reading DROM failed: %d\n", ret); tb_sw_dbg(sw, "uid: %#llx\n", sw->uid); - tb_check_quirks(sw); - ret = tb_switch_set_uuid(sw); if (ret) { dev_err(&sw->dev, "failed to set UUID\n"); @@ -2988,6 +2986,8 @@ int tb_switch_add(struct tb_switch *sw) } } + tb_check_quirks(sw); + tb_switch_default_link_ports(sw); ret = tb_switch_update_link_attributes(sw); -- cgit From f0a57dd33b3eadf540912cd130db727ea824d174 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Tue, 31 Jan 2023 13:04:52 +0200 Subject: thunderbolt: Limit USB3 bandwidth of certain Intel USB4 host routers Current Intel USB4 host routers have hardware limitation that the USB3 bandwidth cannot go higher than 16376 Mb/s. Work this around by adding a new quirk that limits the bandwidth for the affected host routers. Cc: stable@vger.kernel.org Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg --- drivers/thunderbolt/quirks.c | 31 +++++++++++++++++++++++++++++++ drivers/thunderbolt/tb.h | 3 +++ drivers/thunderbolt/usb4.c | 17 +++++++++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/quirks.c b/drivers/thunderbolt/quirks.c index ae28a03fa890..1157b8869bcc 100644 --- a/drivers/thunderbolt/quirks.c +++ b/drivers/thunderbolt/quirks.c @@ -26,6 +26,19 @@ static void quirk_clx_disable(struct tb_switch *sw) tb_sw_dbg(sw, "disabling CL states\n"); } +static void quirk_usb3_maximum_bandwidth(struct tb_switch *sw) +{ + struct tb_port *port; + + tb_switch_for_each_port(sw, port) { + if (!tb_port_is_usb3_down(port)) + continue; + port->max_bw = 16376; + tb_port_dbg(port, "USB3 maximum bandwidth limited to %u Mb/s\n", + port->max_bw); + } +} + struct tb_quirk { u16 hw_vendor_id; u16 hw_device_id; @@ -43,6 +56,24 @@ static const struct tb_quirk tb_quirks[] = { * DP buffers. */ { 0x8087, 0x0b26, 0x0000, 0x0000, quirk_dp_credit_allocation }, + /* + * Limit the maximum USB3 bandwidth for the following Intel USB4 + * host routers due to a hardware issue. + */ + { 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI0, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_ADL_NHI1, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI0, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_RPL_NHI1, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_MTL_M_NHI0, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI0, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, + { 0x8087, PCI_DEVICE_ID_INTEL_MTL_P_NHI1, 0x0000, 0x0000, + quirk_usb3_maximum_bandwidth }, /* * CLx is not supported on AMD USB4 Yellow Carp and Pink Sardine platforms. */ diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index b3cd13dc783b..275ff5219a3a 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -272,6 +272,8 @@ struct tb_bandwidth_group { * @group: Bandwidth allocation group the adapter is assigned to. Only * used for DP IN adapters for now. * @group_list: The adapter is linked to the group's list of ports through this + * @max_bw: Maximum possible bandwidth through this adapter if set to + * non-zero. * * In USB4 terminology this structure represents an adapter (protocol or * lane adapter). @@ -299,6 +301,7 @@ struct tb_port { unsigned int dma_credits; struct tb_bandwidth_group *group; struct list_head group_list; + unsigned int max_bw; }; /** diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 95ff02395822..6e87cf993c68 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -1882,6 +1882,15 @@ int usb4_port_retimer_nvm_read(struct tb_port *port, u8 index, usb4_port_retimer_nvm_read_block, &info); } +static inline unsigned int +usb4_usb3_port_max_bandwidth(const struct tb_port *port, unsigned int bw) +{ + /* Take the possible bandwidth limitation into account */ + if (port->max_bw) + return min(bw, port->max_bw); + return bw; +} + /** * usb4_usb3_port_max_link_rate() - Maximum support USB3 link rate * @port: USB3 adapter port @@ -1903,7 +1912,9 @@ int usb4_usb3_port_max_link_rate(struct tb_port *port) return ret; lr = (val & ADP_USB3_CS_4_MSLR_MASK) >> ADP_USB3_CS_4_MSLR_SHIFT; - return lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000; + ret = lr == ADP_USB3_CS_4_MSLR_20G ? 20000 : 10000; + + return usb4_usb3_port_max_bandwidth(port, ret); } /** @@ -1930,7 +1941,9 @@ int usb4_usb3_port_actual_link_rate(struct tb_port *port) return 0; lr = val & ADP_USB3_CS_4_ALR_MASK; - return lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000; + ret = lr == ADP_USB3_CS_4_ALR_20G ? 20000 : 10000; + + return usb4_usb3_port_max_bandwidth(port, ret); } static int usb4_usb3_port_cm_request(struct tb_port *port, bool request) -- cgit From c82510b1d87bdebfe916048857d2ef46f1778aa5 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 27 Dec 2022 11:55:26 +0200 Subject: thunderbolt: Use scale field when allocating USB3 bandwidth When tunneling aggregated USB3 (20 Gb/s) the bandwidth values that are programmed to the ADP_USB3_CS_2 go higher than 4096 and that does not fit anymore to the 12-bit field. Fix this by scaling the value using the scale field accordingly. Fixes: 3b1d8d577ca8 ("thunderbolt: Implement USB3 bandwidth negotiation routines") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/usb4.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 6e87cf993c68..a0996cb2893c 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -2094,18 +2094,30 @@ static int usb4_usb3_port_write_allocated_bandwidth(struct tb_port *port, int downstream_bw) { u32 val, ubw, dbw, scale; - int ret; + int ret, max_bw; - /* Read the used scale, hardware default is 0 */ - ret = tb_port_read(port, &scale, TB_CFG_PORT, - port->cap_adap + ADP_USB3_CS_3, 1); + /* Figure out suitable scale */ + scale = 0; + max_bw = max(upstream_bw, downstream_bw); + while (scale < 64) { + if (mbps_to_usb3_bw(max_bw, scale) < 4096) + break; + scale++; + } + + if (WARN_ON(scale >= 64)) + return -EINVAL; + + ret = tb_port_write(port, &scale, TB_CFG_PORT, + port->cap_adap + ADP_USB3_CS_3, 1); if (ret) return ret; - scale &= ADP_USB3_CS_3_SCALE_MASK; ubw = mbps_to_usb3_bw(upstream_bw, scale); dbw = mbps_to_usb3_bw(downstream_bw, scale); + tb_port_dbg(port, "scaled bandwidth %u/%u, scale %u\n", ubw, dbw, scale); + ret = tb_port_read(port, &val, TB_CFG_PORT, port->cap_adap + ADP_USB3_CS_2, 1); if (ret) -- cgit From d6fd48eff7506bb866a54e40369df8899f2078a9 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Feb 2023 11:01:42 +0100 Subject: virt/coco/sev-guest: Check SEV_SNP attribute at probe time No need to check it on every ioctl. And yes, this is a common SEV driver but it does only SNP-specific operations currently. This can be revisited later, when more use cases appear. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20230307192449.24732-3-bp@alien8.de --- arch/x86/kernel/sev.c | 3 --- drivers/virt/coco/sev-guest/sev-guest.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 679026a640ef..c644c34372e8 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2183,9 +2183,6 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned struct ghcb *ghcb; int ret; - if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) - return -ENODEV; - if (!fw_err) return -EINVAL; diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 7b4e9009f335..ed5d6ae1a144 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -703,6 +703,9 @@ static int __init sev_guest_probe(struct platform_device *pdev) void __iomem *mapping; int ret; + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return -ENODEV; + if (!dev->platform_data) return -ENODEV; -- cgit From 970ab823743fb54b42002ec76c51481f67436444 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Feb 2023 11:39:41 +0100 Subject: virt/coco/sev-guest: Simplify extended guest request handling Return a specific error code - -ENOSPC - to signal the too small cert data buffer instead of checking exit code and exitinfo2. While at it, hoist the *fw_err assignment in snp_issue_guest_request() so that a proper error value is returned to the callers. [ Tom: check override_err instead of err. ] Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230307192449.24732-4-bp@alien8.de --- arch/x86/kernel/sev.c | 11 ++++---- drivers/virt/coco/sev-guest/sev-guest.c | 48 ++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index c644c34372e8..6a3e1425ba17 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2209,15 +2209,16 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned if (ret) goto e_put; + *fw_err = ghcb->save.sw_exit_info_2; if (ghcb->save.sw_exit_info_2) { /* Number of expected pages are returned in RBX */ if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST && - ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN) + ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN) { input->data_npages = ghcb_get_rbx(ghcb); - - *fw_err = ghcb->save.sw_exit_info_2; - - ret = -EIO; + ret = -ENOSPC; + } else { + ret = -EIO; + } } e_put: diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index ed5d6ae1a144..e61db0b15b7a 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -322,7 +322,8 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in u8 type, void *req_buf, size_t req_sz, void *resp_buf, u32 resp_sz, __u64 *fw_err) { - unsigned long err; + unsigned long err, override_err = 0; + unsigned int override_npages = 0; u64 seqno; int rc; @@ -338,6 +339,7 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in if (rc) return rc; +retry_request: /* * Call firmware to process the request. In this function the encrypted * message enters shared memory with the host. So after this call the @@ -346,17 +348,24 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in */ rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); - /* - * If the extended guest request fails due to having too small of a - * certificate data buffer, retry the same guest request without the - * extended data request in order to increment the sequence number - * and thus avoid IV reuse. - */ - if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST && - err == SNP_GUEST_REQ_INVALID_LEN) { - const unsigned int certs_npages = snp_dev->input.data_npages; + switch (rc) { + case -ENOSPC: + /* + * If the extended guest request fails due to having too + * small of a certificate data buffer, retry the same + * guest request without the extended data request in + * order to increment the sequence number and thus avoid + * IV reuse. + */ + override_npages = snp_dev->input.data_npages; + exit_code = SVM_VMGEXIT_GUEST_REQUEST; - exit_code = SVM_VMGEXIT_GUEST_REQUEST; + /* + * Override the error to inform callers the given extended + * request buffer size was too small and give the caller the + * required buffer size. + */ + override_err = SNP_GUEST_REQ_INVALID_LEN; /* * If this call to the firmware succeeds, the sequence number can @@ -366,15 +375,7 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in * of the VMPCK and the error code being propagated back to the * user as an ioctl() return code. */ - rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); - - /* - * Override the error to inform callers the given extended - * request buffer size was too small and give the caller the - * required buffer size. - */ - err = SNP_GUEST_REQ_INVALID_LEN; - snp_dev->input.data_npages = certs_npages; + goto retry_request; } /* @@ -386,7 +387,10 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in snp_inc_msg_seqno(snp_dev); if (fw_err) - *fw_err = err; + *fw_err = override_err ?: err; + + if (override_npages) + snp_dev->input.data_npages = override_npages; /* * If an extended guest request was issued and the supplied certificate @@ -394,7 +398,7 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in * prevent IV reuse. If the standard request was successful, return -EIO * back to the caller as would have originally been returned. */ - if (!rc && err == SNP_GUEST_REQ_INVALID_LEN) + if (!rc && override_err == SNP_GUEST_REQ_INVALID_LEN) return -EIO; if (rc) { -- cgit From c5a338274bdb894f088767bea856be344d0ccaef Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Feb 2023 11:43:43 +0100 Subject: virt/coco/sev-guest: Remove the disable_vmpck label in handle_guest_request() Call the function directly instead. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20230307192449.24732-5-bp@alien8.de --- drivers/virt/coco/sev-guest/sev-guest.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index e61db0b15b7a..a51bd4afd5ab 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -405,7 +405,8 @@ retry_request: dev_alert(snp_dev->dev, "Detected error from ASP request. rc: %d, fw_err: %llu\n", rc, *fw_err); - goto disable_vmpck; + snp_disable_vmpck(snp_dev); + return rc; } rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz); @@ -413,14 +414,11 @@ retry_request: dev_alert(snp_dev->dev, "Detected unexpected decode failure from ASP. rc: %d\n", rc); - goto disable_vmpck; + snp_disable_vmpck(snp_dev); + return rc; } return 0; - -disable_vmpck: - snp_disable_vmpck(snp_dev); - return rc; } static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) -- cgit From 0fdb6cc7c89cb5e0cbc45dbdbafb8e3fb92ddc95 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 7 Mar 2023 09:19:19 -0600 Subject: virt/coco/sev-guest: Carve out the request issuing logic into a helper This makes the code flow a lot easier to follow. No functional changes. [ Tom: touchups. ] Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230307192449.24732-6-bp@alien8.de --- drivers/virt/coco/sev-guest/sev-guest.c | 44 ++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index a51bd4afd5ab..07dafe22b27a 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -318,27 +318,12 @@ static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 return __enc_payload(snp_dev, req, payload, sz); } -static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, int msg_ver, - u8 type, void *req_buf, size_t req_sz, void *resp_buf, - u32 resp_sz, __u64 *fw_err) +static int __handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, __u64 *fw_err) { unsigned long err, override_err = 0; unsigned int override_npages = 0; - u64 seqno; int rc; - /* Get message sequence and verify that its a non-zero */ - seqno = snp_get_msg_seqno(snp_dev); - if (!seqno) - return -EIO; - - memset(snp_dev->response, 0, sizeof(struct snp_guest_msg)); - - /* Encrypt the userspace provided payload */ - rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz); - if (rc) - return rc; - retry_request: /* * Call firmware to process the request. In this function the encrypted @@ -347,7 +332,6 @@ retry_request: * prevent reuse of the IV. */ rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); - switch (rc) { case -ENOSPC: /* @@ -401,7 +385,33 @@ retry_request: if (!rc && override_err == SNP_GUEST_REQ_INVALID_LEN) return -EIO; + return rc; +} + +static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, int msg_ver, + u8 type, void *req_buf, size_t req_sz, void *resp_buf, + u32 resp_sz, __u64 *fw_err) +{ + u64 seqno; + int rc; + + /* Get message sequence and verify that its a non-zero */ + seqno = snp_get_msg_seqno(snp_dev); + if (!seqno) + return -EIO; + + memset(snp_dev->response, 0, sizeof(struct snp_guest_msg)); + + /* Encrypt the userspace provided payload */ + rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz); + if (rc) + return rc; + + rc = __handle_guest_request(snp_dev, exit_code, fw_err); if (rc) { + if (rc == -EIO && *fw_err == SNP_GUEST_REQ_INVALID_LEN) + return rc; + dev_alert(snp_dev->dev, "Detected error from ASP request. rc: %d, fw_err: %llu\n", rc, *fw_err); -- cgit From 2cde14b187b458b7dcf99360fe4435dae1645a1f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 1 Mar 2023 17:25:08 +0100 Subject: accel: Build sub-directories based on config options When accel drivers are disabled do not process into sub-directories and create built-in archives: AR drivers/accel/habanalabs/built-in.a AR drivers/accel/ivpu/built-in.a Fixes: 35b137630f08 ("accel/ivpu: Introduce a new DRM driver for Intel VPU") Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20230301162508.3963484-1-stanislaw.gruszka@linux.intel.com (cherry picked from commit dd61bbd0d1fba48cd9464e047a7f90b70a463e39) Signed-off-by: Jacek Lawrynowicz --- drivers/accel/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile index 07aa77aed1c8..f22fd44d586b 100644 --- a/drivers/accel/Makefile +++ b/drivers/accel/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += habanalabs/ -obj-y += ivpu/ +obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/ +obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/ -- cgit From d25bae7dc7b0668cb2a1325c64eb32d5fea4e5a9 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Feb 2023 11:54:59 +0100 Subject: virt/coco/sev-guest: Do some code style cleanups Remove unnecessary linebreaks, make the code more compact. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20230307192449.24732-7-bp@alien8.de --- drivers/virt/coco/sev-guest/sev-guest.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 07dafe22b27a..81a53c31ff46 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -412,18 +412,14 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in if (rc == -EIO && *fw_err == SNP_GUEST_REQ_INVALID_LEN) return rc; - dev_alert(snp_dev->dev, - "Detected error from ASP request. rc: %d, fw_err: %llu\n", - rc, *fw_err); + dev_alert(snp_dev->dev, "Detected error from ASP request. rc: %d, fw_err: %llu\n", rc, *fw_err); snp_disable_vmpck(snp_dev); return rc; } rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz); if (rc) { - dev_alert(snp_dev->dev, - "Detected unexpected decode failure from ASP. rc: %d\n", - rc); + dev_alert(snp_dev->dev, "Detected unexpected decode failure from ASP. rc: %d\n", rc); snp_disable_vmpck(snp_dev); return rc; } -- cgit From fa4ae42cc60a7dea30e8f2db444b808d80862345 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 16 Feb 2023 10:50:11 +0100 Subject: virt/coco/sev-guest: Convert the sw_exit_info_2 checking to a switch-case snp_issue_guest_request() checks the value returned by the hypervisor in sw_exit_info_2 and returns a different error depending on it. Convert those checks into a switch-case to make it more readable when more error values are going to be checked in the future. No functional changes. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20230307192449.24732-8-bp@alien8.de --- arch/x86/kernel/sev.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 6a3e1425ba17..d67884fb38c1 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2210,15 +2210,21 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned goto e_put; *fw_err = ghcb->save.sw_exit_info_2; - if (ghcb->save.sw_exit_info_2) { + switch (*fw_err) { + case 0: + break; + + case SNP_GUEST_REQ_INVALID_LEN: /* Number of expected pages are returned in RBX */ - if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST && - ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN) { + if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { input->data_npages = ghcb_get_rbx(ghcb); ret = -ENOSPC; - } else { - ret = -EIO; + break; } + fallthrough; + default: + ret = -EIO; + break; } e_put: -- cgit From 72f7754dcf31c87c92c0c353dcf747814cc5ce10 Mon Sep 17 00:00:00 2001 From: Dionna Glaze Date: Thu, 16 Feb 2023 11:08:02 +0100 Subject: virt/coco/sev-guest: Add throttling awareness A potentially malicious SEV guest can constantly hammer the hypervisor using this driver to send down requests and thus prevent or at least considerably hinder other guests from issuing requests to the secure processor which is a shared platform resource. Therefore, the host is permitted and encouraged to throttle such guest requests. Add the capability to handle the case when the hypervisor throttles excessive numbers of requests issued by the guest. Otherwise, the VM platform communication key will be disabled, preventing the guest from attesting itself. Realistically speaking, a well-behaved guest should not even care about throttling. During its lifetime, it would end up issuing a handful of requests which the hardware can easily handle. This is more to address the case of a malicious guest. Such guest should get throttled and if its VMPCK gets disabled, then that's its own wrongdoing and perhaps that guest even deserves it. To the implementation: the hypervisor signals with SNP_GUEST_REQ_ERR_BUSY that the guest requests should be throttled. That error code is returned in the upper 32-bit half of exitinfo2 and this is part of the GHCB spec v2. So the guest is given a throttling period of 1 minute in which it retries the request every 2 seconds. This is a good default but if it turns out to not pan out in practice, it can be tweaked later. For safety, since the encryption algorithm in GHCBv2 is AES_GCM, control must remain in the kernel to complete the request with the current sequence number. Returning without finishing the request allows the guest to make another request but with different message contents. This is IV reuse, and breaks cryptographic protections. [ bp: - Rewrite commit message and do a simplified version. - The stable tags are supposed to denote that a cleanup should go upfront before backporting this so that any future fixes to this can preserve the sanity of the backporter(s). ] Fixes: d5af44dde546 ("x86/sev: Provide support for SNP guest request NAEs") Signed-off-by: Dionna Glaze Co-developed-by: Borislav Petkov (AMD) Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Cc: # d6fd48eff750 ("virt/coco/sev-guest: Check SEV_SNP attribute at probe time") Cc: # 970ab823743f (" virt/coco/sev-guest: Simplify extended guest request handling") Cc: # c5a338274bdb ("virt/coco/sev-guest: Remove the disable_vmpck label in handle_guest_request()") Cc: # 0fdb6cc7c89c ("virt/coco/sev-guest: Carve out the request issuing logic into a helper") Cc: # d25bae7dc7b0 ("virt/coco/sev-guest: Do some code style cleanups") Cc: # fa4ae42cc60a ("virt/coco/sev-guest: Convert the sw_exit_info_2 checking to a switch-case") Link: https://lore.kernel.org/r/20230214164638.1189804-2-dionnaglaze@google.com --- arch/x86/include/asm/sev-common.h | 3 ++- arch/x86/kernel/sev.c | 4 ++++ drivers/virt/coco/sev-guest/sev-guest.c | 19 ++++++++++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index b8357d6ecd47..b63be696b776 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -128,8 +128,9 @@ struct snp_psc_desc { struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; } __packed; -/* Guest message request error code */ +/* Guest message request error codes */ #define SNP_GUEST_REQ_INVALID_LEN BIT_ULL(32) +#define SNP_GUEST_REQ_ERR_BUSY BIT_ULL(33) #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index d67884fb38c1..3f664ab277c4 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2214,6 +2214,10 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned case 0: break; + case SNP_GUEST_REQ_ERR_BUSY: + ret = -EAGAIN; + break; + case SNP_GUEST_REQ_INVALID_LEN: /* Number of expected pages are returned in RBX */ if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 81a53c31ff46..46f1a8d558b0 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -31,6 +31,9 @@ #define AAD_LEN 48 #define MSG_HDR_VER 1 +#define SNP_REQ_MAX_RETRY_DURATION (60*HZ) +#define SNP_REQ_RETRY_DELAY (2*HZ) + struct snp_guest_crypto { struct crypto_aead *tfm; u8 *iv, *authtag; @@ -320,7 +323,8 @@ static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 static int __handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, __u64 *fw_err) { - unsigned long err, override_err = 0; + unsigned long err = 0xff, override_err = 0; + unsigned long req_start = jiffies; unsigned int override_npages = 0; int rc; @@ -360,6 +364,19 @@ retry_request: * user as an ioctl() return code. */ goto retry_request; + + /* + * The host may return SNP_GUEST_REQ_ERR_EBUSY if the request has been + * throttled. Retry in the driver to avoid returning and reusing the + * message sequence number on a different message. + */ + case -EAGAIN: + if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { + rc = -ETIMEDOUT; + break; + } + schedule_timeout_killable(SNP_REQ_RETRY_DELAY); + goto retry_request; } /* -- cgit From 41130c32f3a18fcc930316da17f3a5f3bc326aa1 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 23 Feb 2023 00:10:25 +0100 Subject: wifi: mt76: do not run mt76_unregister_device() on unregistered hw Trying to probe a mt7921e pci card without firmware results in a successful probe where ieee80211_register_hw hasn't been called. When removing the driver, ieee802111_unregister_hw is called unconditionally leading to a kernel NULL pointer dereference. Fix the issue running mt76_unregister_device routine just for registered hw. Link: https://bugs.debian.org/1029116 Link: https://bugs.kali.org/view.php?id=8140 Reported-by: Stuart Hayhurst Fixes: 1c71e03afe4b ("mt76: mt7921: move mt7921_init_hw in a dedicated work") Tested-by: Helmut Grohne Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/be3457d82f4e44bb71a22b2b5db27b644a37b1e1.1677107277.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mac80211.c | 8 ++++++++ drivers/net/wireless/mediatek/mt76/mt76.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index b117e4467c87..34abf70f44af 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -539,6 +539,7 @@ int mt76_register_phy(struct mt76_phy *phy, bool vht, if (ret) return ret; + set_bit(MT76_STATE_REGISTERED, &phy->state); phy->dev->phys[phy->band_idx] = phy; return 0; @@ -549,6 +550,9 @@ void mt76_unregister_phy(struct mt76_phy *phy) { struct mt76_dev *dev = phy->dev; + if (!test_bit(MT76_STATE_REGISTERED, &phy->state)) + return; + if (IS_ENABLED(CONFIG_MT76_LEDS)) mt76_led_cleanup(phy); mt76_tx_status_check(dev, true); @@ -719,6 +723,7 @@ int mt76_register_device(struct mt76_dev *dev, bool vht, return ret; WARN_ON(mt76_worker_setup(hw, &dev->tx_worker, NULL, "tx")); + set_bit(MT76_STATE_REGISTERED, &phy->state); sched_set_fifo_low(dev->tx_worker.task); return 0; @@ -729,6 +734,9 @@ void mt76_unregister_device(struct mt76_dev *dev) { struct ieee80211_hw *hw = dev->hw; + if (!test_bit(MT76_STATE_REGISTERED, &dev->phy.state)) + return; + if (IS_ENABLED(CONFIG_MT76_LEDS)) mt76_led_cleanup(&dev->phy); mt76_tx_status_check(dev, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index ccca0162c8f8..183b0fc5a2d4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -402,6 +402,7 @@ struct mt76_tx_cb { enum { MT76_STATE_INITIALIZED, + MT76_STATE_REGISTERED, MT76_STATE_RUNNING, MT76_STATE_MCU_RUNNING, MT76_SCANNING, -- cgit From c2f73eacee3bf1df2cfe25e1f08a3cae98b1df3d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 1 Mar 2023 17:37:39 +0100 Subject: wifi: mt76: mt7915: add back 160MHz channel width support for MT7915 A number of users reported that this support was working fine before it got removed. Add it back, but leave out the unsupported 80+80 mode. Fixes: ac922bd60ace ("wifi: mt76: mt7915: remove BW160 and BW80+80 support") Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230301163739.52314-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 40 ++++++++++++++++++------ 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index 1ab768feccaa..5e288116b1b0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -383,7 +383,6 @@ mt7915_init_wiphy(struct mt7915_phy *phy) ieee80211_hw_set(hw, SUPPORTS_RX_DECAP_OFFLOAD); ieee80211_hw_set(hw, SUPPORTS_MULTI_BSSID); ieee80211_hw_set(hw, WANT_MONITOR_VIF); - ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW); hw->max_tx_fragments = 4; @@ -396,6 +395,9 @@ mt7915_init_wiphy(struct mt7915_phy *phy) } if (phy->mt76->cap.has_5ghz) { + struct ieee80211_sta_vht_cap *vht_cap; + + vht_cap = &phy->mt76->sband_5g.sband.vht_cap; phy->mt76->sband_5g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING | IEEE80211_HT_CAP_MAX_AMSDU; @@ -403,19 +405,28 @@ mt7915_init_wiphy(struct mt7915_phy *phy) IEEE80211_HT_MPDU_DENSITY_4; if (is_mt7915(&dev->mt76)) { - phy->mt76->sband_5g.sband.vht_cap.cap |= + vht_cap->cap |= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; + + if (!dev->dbdc_support) + vht_cap->cap |= + IEEE80211_VHT_CAP_SHORT_GI_160 | + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ | + FIELD_PREP(IEEE80211_VHT_CAP_EXT_NSS_BW_MASK, 1); } else { - phy->mt76->sband_5g.sband.vht_cap.cap |= + vht_cap->cap |= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; /* mt7916 dbdc with 2g 2x2 bw40 and 5g 2x2 bw160c */ - phy->mt76->sband_5g.sband.vht_cap.cap |= + vht_cap->cap |= IEEE80211_VHT_CAP_SHORT_GI_160 | IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; } + + if (!is_mt7915(&dev->mt76) || !dev->dbdc_support) + ieee80211_hw_set(hw, SUPPORTS_VHT_EXT_NSS_BW); } mt76_set_stream_caps(phy->mt76, true); @@ -841,9 +852,13 @@ mt7915_set_stream_he_txbf_caps(struct mt7915_phy *phy, int sts = hweight8(phy->mt76->chainmask); u8 c, sts_160 = sts; - /* mt7915 doesn't support bw160 */ - if (is_mt7915(&dev->mt76)) - sts_160 = 0; + /* Can do 1/2 of STS in 160Mhz mode for mt7915 */ + if (is_mt7915(&dev->mt76)) { + if (!dev->dbdc_support) + sts_160 /= 2; + else + sts_160 = 0; + } #ifdef CONFIG_MAC80211_MESH if (vif == NL80211_IFTYPE_MESH_POINT) @@ -944,10 +959,15 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band, int i, idx = 0, nss = hweight8(phy->mt76->antenna_mask); u16 mcs_map = 0; u16 mcs_map_160 = 0; - u8 nss_160 = nss; + u8 nss_160; - /* Can't do 160MHz with mt7915 */ - if (is_mt7915(&dev->mt76)) + if (!is_mt7915(&dev->mt76)) + nss_160 = nss; + else if (!dev->dbdc_support) + /* Can do 1/2 of NSS streams in 160Mhz mode for mt7915 */ + nss_160 = nss / 2; + else + /* Can't do 160MHz with mt7915 dbdc */ nss_160 = 0; for (i = 0; i < 8; i++) { -- cgit From 5683e1488aa9b0805a9403d215e48fed29d6d923 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 6 Mar 2023 18:42:51 +0100 Subject: wifi: mt76: connac: do not check WED status for non-mmio devices WED is supported just for mmio devices, so do not check it for usb or sdio devices. This patch fixes the crash reported below: [ 21.946627] wlp0s3u1i3: authenticate with c4:41:1e:f5:2b:1d [ 22.525298] wlp0s3u1i3: send auth to c4:41:1e:f5:2b:1d (try 1/3) [ 22.548274] wlp0s3u1i3: authenticate with c4:41:1e:f5:2b:1d [ 22.557694] wlp0s3u1i3: send auth to c4:41:1e:f5:2b:1d (try 1/3) [ 22.565885] wlp0s3u1i3: authenticated [ 22.569502] wlp0s3u1i3: associate with c4:41:1e:f5:2b:1d (try 1/3) [ 22.578966] wlp0s3u1i3: RX AssocResp from c4:41:1e:f5:2b:1d (capab=0x11 status=30 aid=3) [ 22.579113] wlp0s3u1i3: c4:41:1e:f5:2b:1d rejected association temporarily; comeback duration 1000 TU (1024 ms) [ 23.649518] wlp0s3u1i3: associate with c4:41:1e:f5:2b:1d (try 2/3) [ 23.752528] wlp0s3u1i3: RX AssocResp from c4:41:1e:f5:2b:1d (capab=0x11 status=0 aid=3) [ 23.797450] wlp0s3u1i3: associated [ 24.959527] kernel tried to execute NX-protected page - exploit attempt? (uid: 0) [ 24.959640] BUG: unable to handle page fault for address: ffff88800c223200 [ 24.959706] #PF: supervisor instruction fetch in kernel mode [ 24.959788] #PF: error_code(0x0011) - permissions violation [ 24.959846] PGD 2c01067 P4D 2c01067 PUD 2c02067 PMD c2a8063 PTE 800000000c223163 [ 24.959957] Oops: 0011 [#1] PREEMPT SMP [ 24.960009] CPU: 0 PID: 391 Comm: wpa_supplicant Not tainted 6.2.0-kvm #18 [ 24.960089] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.1-2.fc37 04/01/2014 [ 24.960191] RIP: 0010:0xffff88800c223200 [ 24.960446] RSP: 0018:ffffc90000ff7698 EFLAGS: 00010282 [ 24.960513] RAX: ffff888028397010 RBX: ffff88800c26e630 RCX: 0000000000000058 [ 24.960598] RDX: ffff88800c26f844 RSI: 0000000000000006 RDI: ffff888028397010 [ 24.960682] RBP: ffff88800ea72f00 R08: 18b873fbab2b964c R09: be06b38235f3c63c [ 24.960766] R10: 18b873fbab2b964c R11: be06b38235f3c63c R12: 0000000000000001 [ 24.960853] R13: ffff88800c26f84c R14: ffff8880063f0ff8 R15: ffff88800c26e644 [ 24.960950] FS: 00007effcea327c0(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 [ 24.961036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 24.961106] CR2: ffff88800c223200 CR3: 000000000eaa2000 CR4: 00000000000006b0 [ 24.961190] Call Trace: [ 24.961219] [ 24.961245] ? mt76_connac_mcu_add_key+0x2cf/0x310 [ 24.961313] ? mt7921_set_key+0x150/0x200 [ 24.961365] ? drv_set_key+0xa9/0x1b0 [ 24.961418] ? ieee80211_key_enable_hw_accel+0xd9/0x240 [ 24.961485] ? ieee80211_key_replace+0x3f3/0x730 [ 24.961541] ? crypto_shash_setkey+0x89/0xd0 [ 24.961597] ? ieee80211_key_link+0x2d7/0x3a0 [ 24.961664] ? crypto_aead_setauthsize+0x31/0x50 [ 24.961730] ? sta_info_hash_lookup+0xa6/0xf0 [ 24.961785] ? ieee80211_add_key+0x1fc/0x250 [ 24.961842] ? rdev_add_key+0x41/0x140 [ 24.961882] ? nl80211_parse_key+0x6c/0x2f0 [ 24.961940] ? nl80211_new_key+0x24a/0x290 [ 24.961984] ? genl_rcv_msg+0x36c/0x3a0 [ 24.962036] ? rdev_mod_link_station+0xe0/0xe0 [ 24.962102] ? nl80211_set_key+0x410/0x410 [ 24.962143] ? nl80211_pre_doit+0x200/0x200 [ 24.962187] ? genl_bind+0xc0/0xc0 [ 24.962217] ? netlink_rcv_skb+0xaa/0xd0 [ 24.962259] ? genl_rcv+0x24/0x40 [ 24.962300] ? netlink_unicast+0x224/0x2f0 [ 24.962345] ? netlink_sendmsg+0x30b/0x3d0 [ 24.962388] ? ____sys_sendmsg+0x109/0x1b0 [ 24.962388] ? ____sys_sendmsg+0x109/0x1b0 [ 24.962440] ? __import_iovec+0x2e/0x110 [ 24.962482] ? ___sys_sendmsg+0xbe/0xe0 [ 24.962525] ? mod_objcg_state+0x25c/0x330 [ 24.962576] ? __dentry_kill+0x19e/0x1d0 [ 24.962618] ? call_rcu+0x18f/0x270 [ 24.962660] ? __dentry_kill+0x19e/0x1d0 [ 24.962702] ? __x64_sys_sendmsg+0x70/0x90 [ 24.962744] ? do_syscall_64+0x3d/0x80 [ 24.962796] ? exit_to_user_mode_prepare+0x1b/0x70 [ 24.962852] ? entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 24.962913] [ 24.962939] Modules linked in: [ 24.962981] CR2: ffff88800c223200 [ 24.963022] ---[ end trace 0000000000000000 ]--- [ 24.963087] RIP: 0010:0xffff88800c223200 [ 24.963323] RSP: 0018:ffffc90000ff7698 EFLAGS: 00010282 [ 24.963376] RAX: ffff888028397010 RBX: ffff88800c26e630 RCX: 0000000000000058 [ 24.963458] RDX: ffff88800c26f844 RSI: 0000000000000006 RDI: ffff888028397010 [ 24.963538] RBP: ffff88800ea72f00 R08: 18b873fbab2b964c R09: be06b38235f3c63c [ 24.963622] R10: 18b873fbab2b964c R11: be06b38235f3c63c R12: 0000000000000001 [ 24.963705] R13: ffff88800c26f84c R14: ffff8880063f0ff8 R15: ffff88800c26e644 [ 24.963788] FS: 00007effcea327c0(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 [ 24.963871] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 24.963941] CR2: ffff88800c223200 CR3: 000000000eaa2000 CR4: 00000000000006b0 [ 24.964018] note: wpa_supplicant[391] exited with irqs disabled Fixes: d1369e515efe ("wifi: mt76: connac: introduce mt76_connac_mcu_sta_wed_update utility routine") Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/c42168429453474213fa8244bf4b069de4531f40.1678124335.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index efb9bfaa187f..008ece1b16f8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -1221,6 +1221,9 @@ EXPORT_SYMBOL_GPL(mt76_connac_mcu_sta_ba_tlv); int mt76_connac_mcu_sta_wed_update(struct mt76_dev *dev, struct sk_buff *skb) { + if (!mt76_is_mmio(dev)) + return 0; + if (!mtk_wed_device_active(&dev->mmio.wed)) return 0; -- cgit From 3e453522593d74a87cf68a38e14aa36ebca1dbcd Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Wed, 22 Feb 2023 11:59:16 +0800 Subject: md: Free resources in __md_stop If md_run() fails after ->active_io is initialized, then percpu_ref_exit is called in error path. However, later md_free_disk will call percpu_ref_exit again which leads to a panic because of null pointer dereference. It can also trigger this bug when resources are initialized but are freed in error path, then will be freed again in md_free_disk. BUG: kernel NULL pointer dereference, address: 0000000000000038 Oops: 0000 [#1] PREEMPT SMP Workqueue: md_misc mddev_delayed_delete RIP: 0010:free_percpu+0x110/0x630 Call Trace: __percpu_ref_exit+0x44/0x70 percpu_ref_exit+0x16/0x90 md_free_disk+0x2f/0x80 disk_release+0x101/0x180 device_release+0x84/0x110 kobject_put+0x12a/0x380 kobject_put+0x160/0x380 mddev_delayed_delete+0x19/0x30 process_one_work+0x269/0x680 worker_thread+0x266/0x640 kthread+0x151/0x1b0 ret_from_fork+0x1f/0x30 For creating raid device, md raid calls do_md_run->md_run, dm raid calls md_run. We alloc those memory in md_run. For stopping raid device, md raid calls do_md_stop->__md_stop, dm raid calls md_stop->__md_stop. So we can free those memory resources in __md_stop. Fixes: 72adae23a72c ("md: Change active_io to percpu") Reported-and-tested-by: Yu Kuai Signed-off-by: Xiao Ni Signed-off-by: Song Liu --- drivers/md/md.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 927a43db5dfb..f5480778e2f7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6256,6 +6256,11 @@ static void __md_stop(struct mddev *mddev) mddev->to_remove = &md_redundancy_group; module_put(pers->owner); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + + percpu_ref_exit(&mddev->writes_pending); + percpu_ref_exit(&mddev->active_io); + bioset_exit(&mddev->bio_set); + bioset_exit(&mddev->sync_set); } void md_stop(struct mddev *mddev) @@ -6265,10 +6270,6 @@ void md_stop(struct mddev *mddev) */ __md_stop_writes(mddev); __md_stop(mddev); - percpu_ref_exit(&mddev->writes_pending); - percpu_ref_exit(&mddev->active_io); - bioset_exit(&mddev->bio_set); - bioset_exit(&mddev->sync_set); } EXPORT_SYMBOL_GPL(md_stop); @@ -7839,11 +7840,6 @@ static void md_free_disk(struct gendisk *disk) { struct mddev *mddev = disk->private_data; - percpu_ref_exit(&mddev->writes_pending); - percpu_ref_exit(&mddev->active_io); - bioset_exit(&mddev->bio_set); - bioset_exit(&mddev->sync_set); - mddev_free(mddev); } -- cgit From ee06a3ef7e3cddb62b90ac40aa661d3c12f7cabc Mon Sep 17 00:00:00 2001 From: Jurica Vukadin Date: Tue, 7 Mar 2023 20:40:39 +0100 Subject: kconfig: Update config changed flag before calling callback Prior to commit 5ee546594025 ("kconfig: change sym_change_count to a boolean flag"), the conf_updated flag was set to the new value *before* calling the callback. xconfig's save action depends on this behaviour, because xconfig calls conf_get_changed() directly from the callback and now sees the old value, thus never enabling the save button or the shortcut. Restore the previous behaviour. Fixes: 5ee546594025 ("kconfig: change sym_change_count to a boolean flag") Signed-off-by: Jurica Vukadin Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- scripts/kconfig/confdata.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index b7c9f1dd5e42..992575f1e976 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -1226,10 +1226,12 @@ static void (*conf_changed_callback)(void); void conf_set_changed(bool val) { - if (conf_changed_callback && conf_changed != val) - conf_changed_callback(); + bool changed = conf_changed != val; conf_changed = val; + + if (conf_changed_callback && changed) + conf_changed_callback(); } bool conf_get_changed(void) -- cgit From 4928f67bc911e46a43004251a4d7eb2259ba6077 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 8 Mar 2023 17:57:23 +0200 Subject: vfio/mlx5: Fix the report of dirty_bytes upon pre-copy Fix the report of dirty_bytes upon pre-copy to include both the existing data on the migration file and the device extra bytes. This gives a better close estimation to what can be passed any more as part of pre-copy. Fixes: 0dce165b1adf ("vfio/mlx5: Introduce vfio precopy ioctl implementation") Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20230308155723.108218-1-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/main.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index e897537a9e8a..d95fd382814c 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -442,16 +442,10 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, if (migf->pre_copy_initial_bytes > *pos) { info.initial_bytes = migf->pre_copy_initial_bytes - *pos; } else { - buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data); - if (buf) { - info.dirty_bytes = buf->start_pos + buf->length - *pos; - } else { - if (!end_of_data) { - ret = -EINVAL; - goto err_migf_unlock; - } - info.dirty_bytes = inc_length; - } + info.dirty_bytes = migf->max_pos - *pos; + if (!info.dirty_bytes) + end_of_data = true; + info.dirty_bytes += inc_length; } if (!end_of_data || !inc_length) { -- cgit From 90ae93d8affc1061cd87ca8ddd9a838c7d31a158 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:36 +0100 Subject: interconnect: qcom: rpm: fix registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to fail. Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: 62feb14ee8a3 ("interconnect: qcom: Consolidate interconnect RPM support") Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver") Cc: stable@vger.kernel.org # 5.7 Reviewed-by: Konrad Dybcio Reviewed-by: Jun Nie Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-9-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpm.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c index 91778cfcbc65..4180a06681b2 100644 --- a/drivers/interconnect/qcom/icc-rpm.c +++ b/drivers/interconnect/qcom/icc-rpm.c @@ -503,7 +503,6 @@ regmap_done: } provider = &qp->provider; - INIT_LIST_HEAD(&provider->nodes); provider->dev = dev; provider->set = qcom_icc_set; provider->pre_aggregate = qcom_icc_pre_bw_aggregate; @@ -511,12 +510,7 @@ regmap_done: provider->xlate_extended = qcom_icc_xlate_extended; provider->data = data; - ret = icc_provider_add(provider); - if (ret) { - dev_err(dev, "error adding interconnect provider: %d\n", ret); - clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks); - return ret; - } + icc_provider_init(provider); for (i = 0; i < num_nodes; i++) { size_t j; @@ -524,7 +518,7 @@ regmap_done: node = icc_node_create(qnodes[i]->id); if (IS_ERR(node)) { ret = PTR_ERR(node); - goto err; + goto err_remove_nodes; } node->name = qnodes[i]->name; @@ -538,20 +532,26 @@ regmap_done: } data->num_nodes = num_nodes; + ret = icc_provider_register(provider); + if (ret) + goto err_remove_nodes; + platform_set_drvdata(pdev, qp); /* Populate child NoC devices if any */ if (of_get_child_count(dev->of_node) > 0) { ret = of_platform_populate(dev->of_node, NULL, NULL, dev); if (ret) - goto err; + goto err_deregister_provider; } return 0; -err: + +err_deregister_provider: + icc_provider_deregister(provider); +err_remove_nodes: icc_nodes_remove(provider); clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks); - icc_provider_del(provider); return ret; } @@ -561,9 +561,9 @@ int qnoc_remove(struct platform_device *pdev) { struct qcom_icc_provider *qp = platform_get_drvdata(pdev); + icc_provider_deregister(&qp->provider); icc_nodes_remove(&qp->provider); clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks); - icc_provider_del(&qp->provider); return 0; } -- cgit From 6570d1d46eeade82965ccc4a3ab7d778898ef4bf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:37 +0100 Subject: interconnect: qcom: rpmh: fix probe child-node error handling Make sure to clean up and release resources properly also in case probe fails when populating child devices. Fixes: 57eb14779dfd ("interconnect: qcom: icc-rpmh: Support child NoC device probe") Cc: stable@vger.kernel.org # 6.0 Cc: Luca Weiss Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-10-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpmh.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index fd17291c61eb..5168bbf3d92f 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -235,8 +235,11 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev) platform_set_drvdata(pdev, qp); /* Populate child NoC devices if any */ - if (of_get_child_count(dev->of_node) > 0) - return of_platform_populate(dev->of_node, NULL, NULL, dev); + if (of_get_child_count(dev->of_node) > 0) { + ret = of_platform_populate(dev->of_node, NULL, NULL, dev); + if (ret) + goto err; + } return 0; err: -- cgit From 74240a5bebd48d8b843c6d0f1acfaa722a5abeb7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:38 +0100 Subject: interconnect: qcom: rpmh: fix registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to fail. Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Cc: stable@vger.kernel.org # 5.7 Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-11-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpmh.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index 5168bbf3d92f..fdb5e58e408b 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -192,9 +192,10 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev) provider->pre_aggregate = qcom_icc_pre_aggregate; provider->aggregate = qcom_icc_aggregate; provider->xlate_extended = qcom_icc_xlate_extended; - INIT_LIST_HEAD(&provider->nodes); provider->data = data; + icc_provider_init(provider); + qp->dev = dev; qp->bcms = desc->bcms; qp->num_bcms = desc->num_bcms; @@ -203,10 +204,6 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev) if (IS_ERR(qp->voter)) return PTR_ERR(qp->voter); - ret = icc_provider_add(provider); - if (ret) - return ret; - for (i = 0; i < qp->num_bcms; i++) qcom_icc_bcm_init(qp->bcms[i], dev); @@ -218,7 +215,7 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev) node = icc_node_create(qn->id); if (IS_ERR(node)) { ret = PTR_ERR(node); - goto err; + goto err_remove_nodes; } node->name = qn->name; @@ -232,19 +229,27 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev) } data->num_nodes = num_nodes; + + ret = icc_provider_register(provider); + if (ret) + goto err_remove_nodes; + platform_set_drvdata(pdev, qp); /* Populate child NoC devices if any */ if (of_get_child_count(dev->of_node) > 0) { ret = of_platform_populate(dev->of_node, NULL, NULL, dev); if (ret) - goto err; + goto err_deregister_provider; } return 0; -err: + +err_deregister_provider: + icc_provider_deregister(provider); +err_remove_nodes: icc_nodes_remove(provider); - icc_provider_del(provider); + return ret; } EXPORT_SYMBOL_GPL(qcom_icc_rpmh_probe); @@ -253,8 +258,8 @@ int qcom_icc_rpmh_remove(struct platform_device *pdev) { struct qcom_icc_provider *qp = platform_get_drvdata(pdev); + icc_provider_deregister(&qp->provider); icc_nodes_remove(&qp->provider); - icc_provider_del(&qp->provider); return 0; } -- cgit From bfe7bcd2b9f5215de2144f097f39971180e7ea54 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:39 +0100 Subject: interconnect: qcom: msm8974: fix registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to fail. Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: 4e60a9568dc6 ("interconnect: qcom: add msm8974 driver") Cc: stable@vger.kernel.org # 5.5 Reviewed-by: Brian Masney Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-12-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/msm8974.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/interconnect/qcom/msm8974.c b/drivers/interconnect/qcom/msm8974.c index 5ea192f1141d..1828deaca443 100644 --- a/drivers/interconnect/qcom/msm8974.c +++ b/drivers/interconnect/qcom/msm8974.c @@ -692,7 +692,6 @@ static int msm8974_icc_probe(struct platform_device *pdev) return ret; provider = &qp->provider; - INIT_LIST_HEAD(&provider->nodes); provider->dev = dev; provider->set = msm8974_icc_set; provider->aggregate = icc_std_aggregate; @@ -700,11 +699,7 @@ static int msm8974_icc_probe(struct platform_device *pdev) provider->data = data; provider->get_bw = msm8974_get_bw; - ret = icc_provider_add(provider); - if (ret) { - dev_err(dev, "error adding interconnect provider: %d\n", ret); - goto err_disable_clks; - } + icc_provider_init(provider); for (i = 0; i < num_nodes; i++) { size_t j; @@ -712,7 +707,7 @@ static int msm8974_icc_probe(struct platform_device *pdev) node = icc_node_create(qnodes[i]->id); if (IS_ERR(node)) { ret = PTR_ERR(node); - goto err_del_icc; + goto err_remove_nodes; } node->name = qnodes[i]->name; @@ -729,15 +724,16 @@ static int msm8974_icc_probe(struct platform_device *pdev) } data->num_nodes = num_nodes; + ret = icc_provider_register(provider); + if (ret) + goto err_remove_nodes; + platform_set_drvdata(pdev, qp); return 0; -err_del_icc: +err_remove_nodes: icc_nodes_remove(provider); - icc_provider_del(provider); - -err_disable_clks: clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks); return ret; @@ -747,9 +743,9 @@ static int msm8974_icc_remove(struct platform_device *pdev) { struct msm8974_icc_provider *qp = platform_get_drvdata(pdev); + icc_provider_deregister(&qp->provider); icc_nodes_remove(&qp->provider); clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks); - icc_provider_del(&qp->provider); return 0; } -- cgit From 3aab264875bf3c915ea2517fae1eec213e0b4987 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:42 +0100 Subject: interconnect: exynos: fix node leak in probe PM QoS error path Make sure to add the newly allocated interconnect node to the provider before adding the PM QoS request so that the node is freed on errors. Fixes: 2f95b9d5cf0b ("interconnect: Add generic interconnect driver for Exynos SoCs") Cc: stable@vger.kernel.org # 5.11 Cc: Sylwester Nawrocki Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-15-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/samsung/exynos.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/interconnect/samsung/exynos.c b/drivers/interconnect/samsung/exynos.c index 6559d8cf8068..e70665899482 100644 --- a/drivers/interconnect/samsung/exynos.c +++ b/drivers/interconnect/samsung/exynos.c @@ -149,6 +149,9 @@ static int exynos_generic_icc_probe(struct platform_device *pdev) &priv->bus_clk_ratio)) priv->bus_clk_ratio = EXYNOS_ICC_DEFAULT_BUS_CLK_RATIO; + icc_node->data = priv; + icc_node_add(icc_node, provider); + /* * Register a PM QoS request for the parent (devfreq) device. */ @@ -157,9 +160,6 @@ static int exynos_generic_icc_probe(struct platform_device *pdev) if (ret < 0) goto err_node_del; - icc_node->data = priv; - icc_node_add(icc_node, provider); - icc_parent_node = exynos_icc_get_parent(bus_dev->of_node); if (IS_ERR(icc_parent_node)) { ret = PTR_ERR(icc_parent_node); -- cgit From c9e46ca612cfbb0cf890f7ae7389b742e90efe64 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:43 +0100 Subject: interconnect: exynos: fix registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to trigger a NULL-pointer deference when either a NULL pointer or not fully initialised node is returned from exynos_generic_icc_xlate(). Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: 2f95b9d5cf0b ("interconnect: Add generic interconnect driver for Exynos SoCs") Cc: stable@vger.kernel.org # 5.11 Cc: Sylwester Nawrocki Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-16-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/samsung/exynos.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/interconnect/samsung/exynos.c b/drivers/interconnect/samsung/exynos.c index e70665899482..72e42603823b 100644 --- a/drivers/interconnect/samsung/exynos.c +++ b/drivers/interconnect/samsung/exynos.c @@ -98,12 +98,13 @@ static int exynos_generic_icc_remove(struct platform_device *pdev) struct exynos_icc_priv *priv = platform_get_drvdata(pdev); struct icc_node *parent_node, *node = priv->node; + icc_provider_deregister(&priv->provider); + parent_node = exynos_icc_get_parent(priv->dev->parent->of_node); if (parent_node && !IS_ERR(parent_node)) icc_link_destroy(node, parent_node); icc_nodes_remove(&priv->provider); - icc_provider_del(&priv->provider); return 0; } @@ -132,15 +133,11 @@ static int exynos_generic_icc_probe(struct platform_device *pdev) provider->inter_set = true; provider->data = priv; - ret = icc_provider_add(provider); - if (ret < 0) - return ret; + icc_provider_init(provider); icc_node = icc_node_create(pdev->id); - if (IS_ERR(icc_node)) { - ret = PTR_ERR(icc_node); - goto err_prov_del; - } + if (IS_ERR(icc_node)) + return PTR_ERR(icc_node); priv->node = icc_node; icc_node->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%pOFn", @@ -171,14 +168,17 @@ static int exynos_generic_icc_probe(struct platform_device *pdev) goto err_pmqos_del; } + ret = icc_provider_register(provider); + if (ret < 0) + goto err_pmqos_del; + return 0; err_pmqos_del: dev_pm_qos_remove_request(&priv->qos_req); err_node_del: icc_nodes_remove(provider); -err_prov_del: - icc_provider_del(provider); + return ret; } -- cgit From 859ad5f177efa59f6b8a2fac20561ca5cb13c89f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:44 +0100 Subject: interconnect: exynos: drop redundant link destroy There is no longer any need to explicitly destroy node links as this is now done when the node is destroyed as part of icc_nodes_remove(). Reviewed-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-17-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/samsung/exynos.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/interconnect/samsung/exynos.c b/drivers/interconnect/samsung/exynos.c index 72e42603823b..ebf09bbf725b 100644 --- a/drivers/interconnect/samsung/exynos.c +++ b/drivers/interconnect/samsung/exynos.c @@ -96,14 +96,8 @@ static struct icc_node *exynos_generic_icc_xlate(struct of_phandle_args *spec, static int exynos_generic_icc_remove(struct platform_device *pdev) { struct exynos_icc_priv *priv = platform_get_drvdata(pdev); - struct icc_node *parent_node, *node = priv->node; icc_provider_deregister(&priv->provider); - - parent_node = exynos_icc_get_parent(priv->dev->parent->of_node); - if (parent_node && !IS_ERR(parent_node)) - icc_link_destroy(node, parent_node); - icc_nodes_remove(&priv->provider); return 0; -- cgit From 5553055c62683ce339f9ef5fb2a26c8331485d68 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:45 +0100 Subject: memory: tegra: fix interconnect registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to fail. Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: 06f079816d4c ("memory: tegra-mc: Add interconnect framework") Cc: stable@vger.kernel.org # 5.11 Cc: Dmitry Osipenko Acked-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-18-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/memory/tegra/mc.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 592907546ee6..5cd28619ea9f 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -794,16 +794,12 @@ static int tegra_mc_interconnect_setup(struct tegra_mc *mc) mc->provider.aggregate = mc->soc->icc_ops->aggregate; mc->provider.xlate_extended = mc->soc->icc_ops->xlate_extended; - err = icc_provider_add(&mc->provider); - if (err) - return err; + icc_provider_init(&mc->provider); /* create Memory Controller node */ node = icc_node_create(TEGRA_ICC_MC); - if (IS_ERR(node)) { - err = PTR_ERR(node); - goto del_provider; - } + if (IS_ERR(node)) + return PTR_ERR(node); node->name = "Memory Controller"; icc_node_add(node, &mc->provider); @@ -830,12 +826,14 @@ static int tegra_mc_interconnect_setup(struct tegra_mc *mc) goto remove_nodes; } + err = icc_provider_register(&mc->provider); + if (err) + goto remove_nodes; + return 0; remove_nodes: icc_nodes_remove(&mc->provider); -del_provider: - icc_provider_del(&mc->provider); return err; } -- cgit From abd9f1b49cf25eebeaba193c7707355be3f48dae Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:46 +0100 Subject: memory: tegra124-emc: fix interconnect registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to fail. Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: 380def2d4cf2 ("memory: tegra124: Support interconnect framework") Cc: stable@vger.kernel.org # 5.12 Cc: Dmitry Osipenko Acked-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-19-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/memory/tegra/tegra124-emc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/memory/tegra/tegra124-emc.c b/drivers/memory/tegra/tegra124-emc.c index 85bc936c02f9..00ed2b6a0d1b 100644 --- a/drivers/memory/tegra/tegra124-emc.c +++ b/drivers/memory/tegra/tegra124-emc.c @@ -1351,15 +1351,13 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc) emc->provider.aggregate = soc->icc_ops->aggregate; emc->provider.xlate_extended = emc_of_icc_xlate_extended; - err = icc_provider_add(&emc->provider); - if (err) - goto err_msg; + icc_provider_init(&emc->provider); /* create External Memory Controller node */ node = icc_node_create(TEGRA_ICC_EMC); if (IS_ERR(node)) { err = PTR_ERR(node); - goto del_provider; + goto err_msg; } node->name = "External Memory Controller"; @@ -1380,12 +1378,14 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc) node->name = "External Memory (DRAM)"; icc_node_add(node, &emc->provider); + err = icc_provider_register(&emc->provider); + if (err) + goto remove_nodes; + return 0; remove_nodes: icc_nodes_remove(&emc->provider); -del_provider: - icc_provider_del(&emc->provider); err_msg: dev_err(emc->dev, "failed to initialize ICC: %d\n", err); -- cgit From c5587f61ec050f7e9ebb3e2da29d12af63e833d3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:47 +0100 Subject: memory: tegra20-emc: fix interconnect registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to fail. Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: d5ef16ba5fbe ("memory: tegra20: Support interconnect framework") Cc: stable@vger.kernel.org # 5.11 Cc: Dmitry Osipenko Acked-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-20-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/memory/tegra/tegra20-emc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c index bd4e37b6552d..fd595c851a27 100644 --- a/drivers/memory/tegra/tegra20-emc.c +++ b/drivers/memory/tegra/tegra20-emc.c @@ -1021,15 +1021,13 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc) emc->provider.aggregate = soc->icc_ops->aggregate; emc->provider.xlate_extended = emc_of_icc_xlate_extended; - err = icc_provider_add(&emc->provider); - if (err) - goto err_msg; + icc_provider_init(&emc->provider); /* create External Memory Controller node */ node = icc_node_create(TEGRA_ICC_EMC); if (IS_ERR(node)) { err = PTR_ERR(node); - goto del_provider; + goto err_msg; } node->name = "External Memory Controller"; @@ -1050,12 +1048,14 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc) node->name = "External Memory (DRAM)"; icc_node_add(node, &emc->provider); + err = icc_provider_register(&emc->provider); + if (err) + goto remove_nodes; + return 0; remove_nodes: icc_nodes_remove(&emc->provider); -del_provider: - icc_provider_del(&emc->provider); err_msg: dev_err(emc->dev, "failed to initialize ICC: %d\n", err); -- cgit From 9db481c909dd6312ccfbdc7e343b50e41c727483 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:48 +0100 Subject: memory: tegra30-emc: fix interconnect registration race The current interconnect provider registration interface is inherently racy as nodes are not added until the after adding the provider. This can specifically cause racing DT lookups to fail. Switch to using the new API where the provider is not registered until after it has been fully initialised. Fixes: d5ef16ba5fbe ("memory: tegra20: Support interconnect framework") Cc: stable@vger.kernel.org # 5.11 Cc: Dmitry Osipenko Acked-by: Krzysztof Kozlowski Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230306075651.2449-21-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- drivers/memory/tegra/tegra30-emc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/memory/tegra/tegra30-emc.c b/drivers/memory/tegra/tegra30-emc.c index 77706e9bc543..c91e9b7e2e01 100644 --- a/drivers/memory/tegra/tegra30-emc.c +++ b/drivers/memory/tegra/tegra30-emc.c @@ -1533,15 +1533,13 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc) emc->provider.aggregate = soc->icc_ops->aggregate; emc->provider.xlate_extended = emc_of_icc_xlate_extended; - err = icc_provider_add(&emc->provider); - if (err) - goto err_msg; + icc_provider_init(&emc->provider); /* create External Memory Controller node */ node = icc_node_create(TEGRA_ICC_EMC); if (IS_ERR(node)) { err = PTR_ERR(node); - goto del_provider; + goto err_msg; } node->name = "External Memory Controller"; @@ -1562,12 +1560,14 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc) node->name = "External Memory (DRAM)"; icc_node_add(node, &emc->provider); + err = icc_provider_register(&emc->provider); + if (err) + goto remove_nodes; + return 0; remove_nodes: icc_nodes_remove(&emc->provider); -del_provider: - icc_provider_del(&emc->provider); err_msg: dev_err(emc->dev, "failed to initialize ICC: %d\n", err); -- cgit From 3bc57292278a0b6ac4656cad94c14f2453344b57 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 6 Mar 2023 09:36:25 +1100 Subject: md: avoid signed overflow in slot_store() slot_store() uses kstrtouint() to get a slot number, but stores the result in an "int" variable (by casting a pointer). This can result in a negative slot number if the unsigned int value is very large. A negative number means that the slot is empty, but setting a negative slot number this way will not remove the device from the array. I don't think this is a serious problem, but it could cause confusion and it is best to fix it. Reported-by: Dan Carpenter Signed-off-by: NeilBrown Signed-off-by: Song Liu --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index f5480778e2f7..39e49e5d7182 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3128,6 +3128,9 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) err = kstrtouint(buf, 10, (unsigned int *)&slot); if (err < 0) return err; + if (slot < 0) + /* overflow */ + return -ENOSPC; } if (rdev->mddev->pers && slot == -1) { /* Setting 'slot' on an active array requires also -- cgit From 8b3a149db461d3286d1e211112de3b44ccaeaf71 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 12 Mar 2023 23:00:03 +0100 Subject: efi: earlycon: Reprobe after parsing config tables Commit 732ea9db9d8a ("efi: libstub: Move screen_info handling to common code") reorganized the earlycon handling so that all architectures pass the screen_info data via a EFI config table instead of populating struct screen_info directly, as the latter is only possible when the EFI stub is baked into the kernel (and not into the decompressor). However, this means that struct screen_info may not have been populated yet by the time the earlycon probe takes place, and this results in a non-functional early console. So let's probe again right after parsing the config tables and populating struct screen_info. Note that this means that earlycon output starts a bit later than before, and so it may fail to capture issues that occur while doing the early EFI initialization. Fixes: 732ea9db9d8a ("efi: libstub: Move screen_info handling to common code") Reported-by: Shawn Guo Tested-by: Shawn Guo Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/earlycon.c | 16 +++++++++++++--- drivers/firmware/efi/efi-init.c | 3 +++ include/linux/efi.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c index f54e6fdf08e2..f80a9af3d16e 100644 --- a/drivers/firmware/efi/earlycon.c +++ b/drivers/firmware/efi/earlycon.c @@ -215,6 +215,14 @@ efi_earlycon_write(struct console *con, const char *str, unsigned int num) } } +static bool __initdata fb_probed; + +void __init efi_earlycon_reprobe(void) +{ + if (fb_probed) + setup_earlycon("efifb"); +} + static int __init efi_earlycon_setup(struct earlycon_device *device, const char *opt) { @@ -222,15 +230,17 @@ static int __init efi_earlycon_setup(struct earlycon_device *device, u16 xres, yres; u32 i; - if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) + fb_wb = opt && !strcmp(opt, "ram"); + + if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) { + fb_probed = true; return -ENODEV; + } fb_base = screen_info.lfb_base; if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE) fb_base |= (u64)screen_info.ext_lfb_base << 32; - fb_wb = opt && !strcmp(opt, "ram"); - si = &screen_info; xres = si->lfb_width; yres = si->lfb_height; diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index 2c16080e1f71..ef0820f1a924 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -72,6 +72,9 @@ static void __init init_screen_info(void) if (memblock_is_map_memory(screen_info.lfb_base)) memblock_mark_nomap(screen_info.lfb_base, screen_info.lfb_size); + + if (IS_ENABLED(CONFIG_EFI_EARLYCON)) + efi_earlycon_reprobe(); } } diff --git a/include/linux/efi.h b/include/linux/efi.h index 04a733f0ba95..7aa62c92185f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -693,6 +693,7 @@ efi_guid_to_str(efi_guid_t *guid, char *out) } extern void efi_init (void); +extern void efi_earlycon_reprobe(void); #ifdef CONFIG_EFI extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ #else -- cgit From 131db499162274858bdbd7b5323a639da4aab86c Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Fri, 10 Mar 2023 07:13:56 -0800 Subject: bnxt_en: reset PHC frequency in free-running mode When using a PHC in shared between multiple hosts, the previous frequency value may not be reset and could lead to host being unable to compensate the offset with timecounter adjustments. To avoid such state reset the hardware frequency of PHC to zero on init. Some refactoring is needed to make code readable. Fixes: 85036aee1938 ("bnxt_en: Add a non-real time mode to access NIC clock") Signed-off-by: Vadim Fedorenko Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20230310151356.678059-1-vadfed@meta.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 + drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 56 +++++++++++++++------------ 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 808236dc898b..e2e2c986c82b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6990,11 +6990,9 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) if (flags & FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED) bp->fw_cap |= BNXT_FW_CAP_DCBX_AGENT; } - if (BNXT_PF(bp) && (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST)) { + if (BNXT_PF(bp) && (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST)) bp->flags |= BNXT_FLAG_MULTI_HOST; - if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) - bp->fw_cap &= ~BNXT_FW_CAP_PTP_RTC; - } + if (flags & FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED) bp->fw_cap |= BNXT_FW_CAP_RING_MONITOR; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index dcb09fbe4007..c0628ac1b798 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2000,6 +2000,8 @@ struct bnxt { u32 fw_dbg_cap; #define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM) +#define BNXT_PTP_USE_RTC(bp) (!BNXT_MH(bp) && \ + ((bp)->fw_cap & BNXT_FW_CAP_PTP_RTC)) u32 hwrm_spec_code; u16 hwrm_cmd_seq; u16 hwrm_cmd_kong_seq; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 4ec8bba18cdd..a3a3978a4d1c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -63,7 +63,7 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info, ptp_info); u64 ns = timespec64_to_ns(ts); - if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC) + if (BNXT_PTP_USE_RTC(ptp->bp)) return bnxt_ptp_cfg_settime(ptp->bp, ns); spin_lock_bh(&ptp->ptp_lock); @@ -196,7 +196,7 @@ static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta) struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg, ptp_info); - if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC) + if (BNXT_PTP_USE_RTC(ptp->bp)) return bnxt_ptp_adjphc(ptp, delta); spin_lock_bh(&ptp->ptp_lock); @@ -205,34 +205,39 @@ static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta) return 0; } +static int bnxt_ptp_adjfine_rtc(struct bnxt *bp, long scaled_ppm) +{ + s32 ppb = scaled_ppm_to_ppb(scaled_ppm); + struct hwrm_port_mac_cfg_input *req; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG); + if (rc) + return rc; + + req->ptp_freq_adj_ppb = cpu_to_le32(ppb); + req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB); + rc = hwrm_req_send(bp, req); + if (rc) + netdev_err(bp->dev, + "ptp adjfine failed. rc = %d\n", rc); + return rc; +} + static int bnxt_ptp_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm) { struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg, ptp_info); - struct hwrm_port_mac_cfg_input *req; struct bnxt *bp = ptp->bp; - int rc = 0; - if (!(ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)) { - spin_lock_bh(&ptp->ptp_lock); - timecounter_read(&ptp->tc); - ptp->cc.mult = adjust_by_scaled_ppm(ptp->cmult, scaled_ppm); - spin_unlock_bh(&ptp->ptp_lock); - } else { - s32 ppb = scaled_ppm_to_ppb(scaled_ppm); - - rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG); - if (rc) - return rc; + if (BNXT_PTP_USE_RTC(bp)) + return bnxt_ptp_adjfine_rtc(bp, scaled_ppm); - req->ptp_freq_adj_ppb = cpu_to_le32(ppb); - req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB); - rc = hwrm_req_send(ptp->bp, req); - if (rc) - netdev_err(ptp->bp->dev, - "ptp adjfine failed. rc = %d\n", rc); - } - return rc; + spin_lock_bh(&ptp->ptp_lock); + timecounter_read(&ptp->tc); + ptp->cc.mult = adjust_by_scaled_ppm(ptp->cmult, scaled_ppm); + spin_unlock_bh(&ptp->ptp_lock); + return 0; } void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2) @@ -879,7 +884,7 @@ int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg) u64 ns; int rc; - if (!bp->ptp_cfg || !(bp->fw_cap & BNXT_FW_CAP_PTP_RTC)) + if (!bp->ptp_cfg || !BNXT_PTP_USE_RTC(bp)) return -ENODEV; if (!phc_cfg) { @@ -932,13 +937,14 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS); spin_lock_init(&ptp->ptp_lock); - if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) { + if (BNXT_PTP_USE_RTC(bp)) { bnxt_ptp_timecounter_init(bp, false); rc = bnxt_ptp_init_rtc(bp, phc_cfg); if (rc) goto out; } else { bnxt_ptp_timecounter_init(bp, true); + bnxt_ptp_adjfine_rtc(bp, 0); } ptp->ptp_info = bnxt_ptp_caps; -- cgit From 22a825c541d775c1dbe7b2402786025acad6727b Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Wed, 8 Mar 2023 16:17:12 +0800 Subject: net/smc: fix NULL sndbuf_desc in smc_cdc_tx_handler() When performing a stress test on SMC-R by rmmod mlx5_ib driver during the wrk/nginx test, we found that there is a probability of triggering a panic while terminating all link groups. This issue dues to the race between smc_smcr_terminate_all() and smc_buf_create(). smc_smcr_terminate_all smc_buf_create /* init */ conn->sndbuf_desc = NULL; ... __smc_lgr_terminate smc_conn_kill smc_close_abort smc_cdc_get_slot_and_msg_send __softirqentry_text_start smc_wr_tx_process_cqe smc_cdc_tx_handler READ(conn->sndbuf_desc->len); /* panic dues to NULL sndbuf_desc */ conn->sndbuf_desc = xxx; This patch tries to fix the issue by always to check the sndbuf_desc before send any cdc msg, to make sure that no null pointer is seen during cqe processing. Fixes: 0b29ec643613 ("net/smc: immediate termination for SMCR link groups") Signed-off-by: D. Wythe Reviewed-by: Tony Lu Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/1678263432-17329-1-git-send-email-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_cdc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 53f63bfbaf5f..89105e95b452 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -114,6 +114,9 @@ int smc_cdc_msg_send(struct smc_connection *conn, union smc_host_cursor cfed; int rc; + if (unlikely(!READ_ONCE(conn->sndbuf_desc))) + return -ENOBUFS; + smc_cdc_add_pending_send(conn, pend); conn->tx_cdc_seq++; -- cgit From 1a9dc5610ef89d807acdcfbff93a558f341a44da Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Thu, 9 Mar 2023 23:15:56 +0300 Subject: qed/qed_dev: guard against a possible division by zero Previously we would divide total_left_rate by zero if num_vports happened to be 1 because non_requested_count is calculated as num_vports - req_count. Guard against this by validating num_vports at the beginning and returning an error otherwise. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: bcd197c81f63 ("qed: Add vport WFQ configuration APIs") Signed-off-by: Daniil Tatianin Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230309201556.191392-1-d-tatianin@yandex-team.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index d61cd32ec3b6..86a93cac2647 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -5083,6 +5083,11 @@ static int qed_init_wfq_param(struct qed_hwfn *p_hwfn, num_vports = p_hwfn->qm_info.num_vports; + if (num_vports < 2) { + DP_NOTICE(p_hwfn, "Unexpected num_vports: %d\n", num_vports); + return -EINVAL; + } + /* Accounting for the vports which are configured for WFQ explicitly */ for (i = 0; i < num_vports; i++) { u32 tmp_speed; -- cgit From feb03fd11c5616f3a47e4714d2f9917d0f1a2edd Mon Sep 17 00:00:00 2001 From: Arınç ÜNAL Date: Fri, 10 Mar 2023 10:33:37 +0300 Subject: net: dsa: mt7530: remove now incorrect comment regarding port 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove now incorrect comment regarding port 5 as GMAC5. This is supposed to be supported since commit 38f790a80560 ("net: dsa: mt7530: Add support for port 5") under mt7530_setup_port5(). Fixes: 38f790a80560 ("net: dsa: mt7530: Add support for port 5") Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230310073338.5836-1-arinc.unal@arinc9.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index a508402c4ecb..b1a79460df0e 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2201,7 +2201,7 @@ mt7530_setup(struct dsa_switch *ds) mt7530_pll_setup(priv); - /* Enable Port 6 only; P5 as GMAC5 which currently is not supported */ + /* Enable port 6 */ val = mt7530_read(priv, MT7530_MHWTRAP); val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS; val |= MHWTRAP_MANUAL; -- cgit From 0b086d76e7b011772b0ac214c6e5fd5816eff2df Mon Sep 17 00:00:00 2001 From: Arınç ÜNAL Date: Fri, 10 Mar 2023 10:33:38 +0300 Subject: net: dsa: mt7530: set PLL frequency and trgmii only when trgmii is used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As my testing on the MCM MT7530 switch on MT7621 SoC shows, setting the PLL frequency does not affect MII modes other than trgmii on port 5 and port 6. So the assumption is that the operation here called "setting the PLL frequency" actually sets the frequency of the TRGMII TX clock. Make it so that it and the rest of the trgmii setup run only when the trgmii mode is used. Tested rgmii and trgmii modes of port 6 on MCM MT7530 on MT7621AT Unielec U7621-06 and standalone MT7530 on MT7623NI Bananapi BPI-R2. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Tested-by: Arınç ÜNAL Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230310073338.5836-2-arinc.unal@arinc9.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 62 ++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b1a79460df0e..c2d81b7a429d 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -430,8 +430,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) switch (interface) { case PHY_INTERFACE_MODE_RGMII: trgint = 0; - /* PLL frequency: 125MHz */ - ncpo1 = 0x0c80; break; case PHY_INTERFACE_MODE_TRGMII: trgint = 1; @@ -462,38 +460,40 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) mt7530_rmw(priv, MT7530_P6ECR, P6_INTF_MODE_MASK, P6_INTF_MODE(trgint)); - /* Lower Tx Driving for TRGMII path */ - for (i = 0 ; i < NUM_TRGMII_CTRL ; i++) - mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), - TD_DM_DRVP(8) | TD_DM_DRVN(8)); - - /* Disable MT7530 core and TRGMII Tx clocks */ - core_clear(priv, CORE_TRGMII_GSW_CLK_CG, - REG_GSWCK_EN | REG_TRGMIICK_EN); - - /* Setup the MT7530 TRGMII Tx Clock */ - core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1)); - core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0)); - core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta)); - core_write(priv, CORE_PLL_GROUP11, RG_LCDDS_SSC_DELTA1(ssc_delta)); - core_write(priv, CORE_PLL_GROUP4, - RG_SYSPLL_DDSFBK_EN | RG_SYSPLL_BIAS_EN | - RG_SYSPLL_BIAS_LPF_EN); - core_write(priv, CORE_PLL_GROUP2, - RG_SYSPLL_EN_NORMAL | RG_SYSPLL_VODEN | - RG_SYSPLL_POSDIV(1)); - core_write(priv, CORE_PLL_GROUP7, - RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) | - RG_LCDDS_PWDB | RG_LCDDS_ISO_EN); - - /* Enable MT7530 core and TRGMII Tx clocks */ - core_set(priv, CORE_TRGMII_GSW_CLK_CG, - REG_GSWCK_EN | REG_TRGMIICK_EN); - - if (!trgint) + if (trgint) { + /* Lower Tx Driving for TRGMII path */ + for (i = 0 ; i < NUM_TRGMII_CTRL ; i++) + mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), + TD_DM_DRVP(8) | TD_DM_DRVN(8)); + + /* Disable MT7530 core and TRGMII Tx clocks */ + core_clear(priv, CORE_TRGMII_GSW_CLK_CG, + REG_GSWCK_EN | REG_TRGMIICK_EN); + + /* Setup the MT7530 TRGMII Tx Clock */ + core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1)); + core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0)); + core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta)); + core_write(priv, CORE_PLL_GROUP11, RG_LCDDS_SSC_DELTA1(ssc_delta)); + core_write(priv, CORE_PLL_GROUP4, + RG_SYSPLL_DDSFBK_EN | RG_SYSPLL_BIAS_EN | + RG_SYSPLL_BIAS_LPF_EN); + core_write(priv, CORE_PLL_GROUP2, + RG_SYSPLL_EN_NORMAL | RG_SYSPLL_VODEN | + RG_SYSPLL_POSDIV(1)); + core_write(priv, CORE_PLL_GROUP7, + RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) | + RG_LCDDS_PWDB | RG_LCDDS_ISO_EN); + + /* Enable MT7530 core and TRGMII Tx clocks */ + core_set(priv, CORE_TRGMII_GSW_CLK_CG, + REG_GSWCK_EN | REG_TRGMIICK_EN); + } else { for (i = 0 ; i < NUM_TRGMII_CTRL; i++) mt7530_rmw(priv, MT7530_TRGMII_RD(i), RD_TAP_MASK, RD_TAP(16)); + } + return 0; } -- cgit From 512dd354718b98c60d4ff6017ff8c9f66c10d03f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 10 Mar 2023 13:37:09 -0600 Subject: net: ipa: fix a surprising number of bad offsets A recent commit eliminated a hack that adjusted the offset used for many GSI registers. It became possible because we now specify all GSI register offsets explicitly for every version of IPA. Unfortunately, a large number of register offsets were *not* updated as they should have been in that commit. For IPA v4.5+, the offset for every GSI register *except* the two inter-EE interrupt masking registers were supposed to have been reduced by 0xd000. Tested-by: Luca Weiss Tested-by: Dmitry Baryshkov # SM8350-HDK Fixes: 59b12b1d27f3 ("net: ipa: kill gsi->virt_raw") Signed-off-by: Alex Elder Link: https://lore.kernel.org/r/20230310193709.1477102-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/reg/gsi_reg-v4.5.c | 56 +++++++++++++++++++------------------- drivers/net/ipa/reg/gsi_reg-v4.9.c | 44 +++++++++++++++--------------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/drivers/net/ipa/reg/gsi_reg-v4.5.c b/drivers/net/ipa/reg/gsi_reg-v4.5.c index 648b51b88d4e..2900e5c3ff88 100644 --- a/drivers/net/ipa/reg/gsi_reg-v4.5.c +++ b/drivers/net/ipa/reg/gsi_reg-v4.5.c @@ -137,17 +137,17 @@ REG_STRIDE(EV_CH_E_SCRATCH_1, ev_ch_e_scratch_1, 0x0001004c + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(CH_C_DOORBELL_0, ch_c_doorbell_0, - 0x0001e000 + 0x4000 * GSI_EE_AP, 0x08); + 0x00011000 + 0x4000 * GSI_EE_AP, 0x08); REG_STRIDE(EV_CH_E_DOORBELL_0, ev_ch_e_doorbell_0, - 0x0001e100 + 0x4000 * GSI_EE_AP, 0x08); + 0x00011100 + 0x4000 * GSI_EE_AP, 0x08); static const u32 reg_gsi_status_fmask[] = { [ENABLED] = BIT(0), /* Bits 1-31 reserved */ }; -REG_FIELDS(GSI_STATUS, gsi_status, 0x0001f000 + 0x4000 * GSI_EE_AP); +REG_FIELDS(GSI_STATUS, gsi_status, 0x00012000 + 0x4000 * GSI_EE_AP); static const u32 reg_ch_cmd_fmask[] = { [CH_CHID] = GENMASK(7, 0), @@ -155,7 +155,7 @@ static const u32 reg_ch_cmd_fmask[] = { [CH_OPCODE] = GENMASK(31, 24), }; -REG_FIELDS(CH_CMD, ch_cmd, 0x0001f008 + 0x4000 * GSI_EE_AP); +REG_FIELDS(CH_CMD, ch_cmd, 0x00012008 + 0x4000 * GSI_EE_AP); static const u32 reg_ev_ch_cmd_fmask[] = { [EV_CHID] = GENMASK(7, 0), @@ -163,7 +163,7 @@ static const u32 reg_ev_ch_cmd_fmask[] = { [EV_OPCODE] = GENMASK(31, 24), }; -REG_FIELDS(EV_CH_CMD, ev_ch_cmd, 0x0001f010 + 0x4000 * GSI_EE_AP); +REG_FIELDS(EV_CH_CMD, ev_ch_cmd, 0x00012010 + 0x4000 * GSI_EE_AP); static const u32 reg_generic_cmd_fmask[] = { [GENERIC_OPCODE] = GENMASK(4, 0), @@ -172,7 +172,7 @@ static const u32 reg_generic_cmd_fmask[] = { /* Bits 14-31 reserved */ }; -REG_FIELDS(GENERIC_CMD, generic_cmd, 0x0001f018 + 0x4000 * GSI_EE_AP); +REG_FIELDS(GENERIC_CMD, generic_cmd, 0x00012018 + 0x4000 * GSI_EE_AP); static const u32 reg_hw_param_2_fmask[] = { [IRAM_SIZE] = GENMASK(2, 0), @@ -188,58 +188,58 @@ static const u32 reg_hw_param_2_fmask[] = { [GSI_USE_INTER_EE] = BIT(31), }; -REG_FIELDS(HW_PARAM_2, hw_param_2, 0x0001f040 + 0x4000 * GSI_EE_AP); +REG_FIELDS(HW_PARAM_2, hw_param_2, 0x00012040 + 0x4000 * GSI_EE_AP); -REG(CNTXT_TYPE_IRQ, cntxt_type_irq, 0x0001f080 + 0x4000 * GSI_EE_AP); +REG(CNTXT_TYPE_IRQ, cntxt_type_irq, 0x00012080 + 0x4000 * GSI_EE_AP); -REG(CNTXT_TYPE_IRQ_MSK, cntxt_type_irq_msk, 0x0001f088 + 0x4000 * GSI_EE_AP); +REG(CNTXT_TYPE_IRQ_MSK, cntxt_type_irq_msk, 0x00012088 + 0x4000 * GSI_EE_AP); -REG(CNTXT_SRC_CH_IRQ, cntxt_src_ch_irq, 0x0001f090 + 0x4000 * GSI_EE_AP); +REG(CNTXT_SRC_CH_IRQ, cntxt_src_ch_irq, 0x00012090 + 0x4000 * GSI_EE_AP); -REG(CNTXT_SRC_EV_CH_IRQ, cntxt_src_ev_ch_irq, 0x0001f094 + 0x4000 * GSI_EE_AP); +REG(CNTXT_SRC_EV_CH_IRQ, cntxt_src_ev_ch_irq, 0x00012094 + 0x4000 * GSI_EE_AP); REG(CNTXT_SRC_CH_IRQ_MSK, cntxt_src_ch_irq_msk, - 0x0001f098 + 0x4000 * GSI_EE_AP); + 0x00012098 + 0x4000 * GSI_EE_AP); REG(CNTXT_SRC_EV_CH_IRQ_MSK, cntxt_src_ev_ch_irq_msk, - 0x0001f09c + 0x4000 * GSI_EE_AP); + 0x0001209c + 0x4000 * GSI_EE_AP); REG(CNTXT_SRC_CH_IRQ_CLR, cntxt_src_ch_irq_clr, - 0x0001f0a0 + 0x4000 * GSI_EE_AP); + 0x000120a0 + 0x4000 * GSI_EE_AP); REG(CNTXT_SRC_EV_CH_IRQ_CLR, cntxt_src_ev_ch_irq_clr, - 0x0001f0a4 + 0x4000 * GSI_EE_AP); + 0x000120a4 + 0x4000 * GSI_EE_AP); -REG(CNTXT_SRC_IEOB_IRQ, cntxt_src_ieob_irq, 0x0001f0b0 + 0x4000 * GSI_EE_AP); +REG(CNTXT_SRC_IEOB_IRQ, cntxt_src_ieob_irq, 0x000120b0 + 0x4000 * GSI_EE_AP); REG(CNTXT_SRC_IEOB_IRQ_MSK, cntxt_src_ieob_irq_msk, - 0x0001f0b8 + 0x4000 * GSI_EE_AP); + 0x000120b8 + 0x4000 * GSI_EE_AP); REG(CNTXT_SRC_IEOB_IRQ_CLR, cntxt_src_ieob_irq_clr, - 0x0001f0c0 + 0x4000 * GSI_EE_AP); + 0x000120c0 + 0x4000 * GSI_EE_AP); -REG(CNTXT_GLOB_IRQ_STTS, cntxt_glob_irq_stts, 0x0001f100 + 0x4000 * GSI_EE_AP); +REG(CNTXT_GLOB_IRQ_STTS, cntxt_glob_irq_stts, 0x00012100 + 0x4000 * GSI_EE_AP); -REG(CNTXT_GLOB_IRQ_EN, cntxt_glob_irq_en, 0x0001f108 + 0x4000 * GSI_EE_AP); +REG(CNTXT_GLOB_IRQ_EN, cntxt_glob_irq_en, 0x00012108 + 0x4000 * GSI_EE_AP); -REG(CNTXT_GLOB_IRQ_CLR, cntxt_glob_irq_clr, 0x0001f110 + 0x4000 * GSI_EE_AP); +REG(CNTXT_GLOB_IRQ_CLR, cntxt_glob_irq_clr, 0x00012110 + 0x4000 * GSI_EE_AP); -REG(CNTXT_GSI_IRQ_STTS, cntxt_gsi_irq_stts, 0x0001f118 + 0x4000 * GSI_EE_AP); +REG(CNTXT_GSI_IRQ_STTS, cntxt_gsi_irq_stts, 0x00012118 + 0x4000 * GSI_EE_AP); -REG(CNTXT_GSI_IRQ_EN, cntxt_gsi_irq_en, 0x0001f120 + 0x4000 * GSI_EE_AP); +REG(CNTXT_GSI_IRQ_EN, cntxt_gsi_irq_en, 0x00012120 + 0x4000 * GSI_EE_AP); -REG(CNTXT_GSI_IRQ_CLR, cntxt_gsi_irq_clr, 0x0001f128 + 0x4000 * GSI_EE_AP); +REG(CNTXT_GSI_IRQ_CLR, cntxt_gsi_irq_clr, 0x00012128 + 0x4000 * GSI_EE_AP); static const u32 reg_cntxt_intset_fmask[] = { [INTYPE] = BIT(0) /* Bits 1-31 reserved */ }; -REG_FIELDS(CNTXT_INTSET, cntxt_intset, 0x0001f180 + 0x4000 * GSI_EE_AP); +REG_FIELDS(CNTXT_INTSET, cntxt_intset, 0x00012180 + 0x4000 * GSI_EE_AP); -REG_FIELDS(ERROR_LOG, error_log, 0x0001f200 + 0x4000 * GSI_EE_AP); +REG_FIELDS(ERROR_LOG, error_log, 0x00012200 + 0x4000 * GSI_EE_AP); -REG(ERROR_LOG_CLR, error_log_clr, 0x0001f210 + 0x4000 * GSI_EE_AP); +REG(ERROR_LOG_CLR, error_log_clr, 0x00012210 + 0x4000 * GSI_EE_AP); static const u32 reg_cntxt_scratch_0_fmask[] = { [INTER_EE_RESULT] = GENMASK(2, 0), @@ -248,7 +248,7 @@ static const u32 reg_cntxt_scratch_0_fmask[] = { /* Bits 8-31 reserved */ }; -REG_FIELDS(CNTXT_SCRATCH_0, cntxt_scratch_0, 0x0001f400 + 0x4000 * GSI_EE_AP); +REG_FIELDS(CNTXT_SCRATCH_0, cntxt_scratch_0, 0x00012400 + 0x4000 * GSI_EE_AP); static const struct reg *reg_array[] = { [INTER_EE_SRC_CH_IRQ_MSK] = ®_inter_ee_src_ch_irq_msk, diff --git a/drivers/net/ipa/reg/gsi_reg-v4.9.c b/drivers/net/ipa/reg/gsi_reg-v4.9.c index 4bf45d264d6b..8b5d95425a76 100644 --- a/drivers/net/ipa/reg/gsi_reg-v4.9.c +++ b/drivers/net/ipa/reg/gsi_reg-v4.9.c @@ -27,7 +27,7 @@ static const u32 reg_ch_c_cntxt_0_fmask[] = { }; REG_STRIDE_FIELDS(CH_C_CNTXT_0, ch_c_cntxt_0, - 0x0001c000 + 0x4000 * GSI_EE_AP, 0x80); + 0x0000f000 + 0x4000 * GSI_EE_AP, 0x80); static const u32 reg_ch_c_cntxt_1_fmask[] = { [CH_R_LENGTH] = GENMASK(19, 0), @@ -35,11 +35,11 @@ static const u32 reg_ch_c_cntxt_1_fmask[] = { }; REG_STRIDE_FIELDS(CH_C_CNTXT_1, ch_c_cntxt_1, - 0x0001c004 + 0x4000 * GSI_EE_AP, 0x80); + 0x0000f004 + 0x4000 * GSI_EE_AP, 0x80); -REG_STRIDE(CH_C_CNTXT_2, ch_c_cntxt_2, 0x0001c008 + 0x4000 * GSI_EE_AP, 0x80); +REG_STRIDE(CH_C_CNTXT_2, ch_c_cntxt_2, 0x0000f008 + 0x4000 * GSI_EE_AP, 0x80); -REG_STRIDE(CH_C_CNTXT_3, ch_c_cntxt_3, 0x0001c00c + 0x4000 * GSI_EE_AP, 0x80); +REG_STRIDE(CH_C_CNTXT_3, ch_c_cntxt_3, 0x0000f00c + 0x4000 * GSI_EE_AP, 0x80); static const u32 reg_ch_c_qos_fmask[] = { [WRR_WEIGHT] = GENMASK(3, 0), @@ -53,7 +53,7 @@ static const u32 reg_ch_c_qos_fmask[] = { /* Bits 25-31 reserved */ }; -REG_STRIDE_FIELDS(CH_C_QOS, ch_c_qos, 0x0001c05c + 0x4000 * GSI_EE_AP, 0x80); +REG_STRIDE_FIELDS(CH_C_QOS, ch_c_qos, 0x0000f05c + 0x4000 * GSI_EE_AP, 0x80); static const u32 reg_error_log_fmask[] = { [ERR_ARG3] = GENMASK(3, 0), @@ -67,16 +67,16 @@ static const u32 reg_error_log_fmask[] = { }; REG_STRIDE(CH_C_SCRATCH_0, ch_c_scratch_0, - 0x0001c060 + 0x4000 * GSI_EE_AP, 0x80); + 0x0000f060 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(CH_C_SCRATCH_1, ch_c_scratch_1, - 0x0001c064 + 0x4000 * GSI_EE_AP, 0x80); + 0x0000f064 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(CH_C_SCRATCH_2, ch_c_scratch_2, - 0x0001c068 + 0x4000 * GSI_EE_AP, 0x80); + 0x0000f068 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(CH_C_SCRATCH_3, ch_c_scratch_3, - 0x0001c06c + 0x4000 * GSI_EE_AP, 0x80); + 0x0000f06c + 0x4000 * GSI_EE_AP, 0x80); static const u32 reg_ev_ch_e_cntxt_0_fmask[] = { [EV_CHTYPE] = GENMASK(3, 0), @@ -89,23 +89,23 @@ static const u32 reg_ev_ch_e_cntxt_0_fmask[] = { }; REG_STRIDE_FIELDS(EV_CH_E_CNTXT_0, ev_ch_e_cntxt_0, - 0x0001d000 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010000 + 0x4000 * GSI_EE_AP, 0x80); static const u32 reg_ev_ch_e_cntxt_1_fmask[] = { [R_LENGTH] = GENMASK(15, 0), }; REG_STRIDE_FIELDS(EV_CH_E_CNTXT_1, ev_ch_e_cntxt_1, - 0x0001d004 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010004 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_CNTXT_2, ev_ch_e_cntxt_2, - 0x0001d008 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010008 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_CNTXT_3, ev_ch_e_cntxt_3, - 0x0001d00c + 0x4000 * GSI_EE_AP, 0x80); + 0x0001000c + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_CNTXT_4, ev_ch_e_cntxt_4, - 0x0001d010 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010010 + 0x4000 * GSI_EE_AP, 0x80); static const u32 reg_ev_ch_e_cntxt_8_fmask[] = { [EV_MODT] = GENMASK(15, 0), @@ -114,28 +114,28 @@ static const u32 reg_ev_ch_e_cntxt_8_fmask[] = { }; REG_STRIDE_FIELDS(EV_CH_E_CNTXT_8, ev_ch_e_cntxt_8, - 0x0001d020 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010020 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_CNTXT_9, ev_ch_e_cntxt_9, - 0x0001d024 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010024 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_CNTXT_10, ev_ch_e_cntxt_10, - 0x0001d028 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010028 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_CNTXT_11, ev_ch_e_cntxt_11, - 0x0001d02c + 0x4000 * GSI_EE_AP, 0x80); + 0x0001002c + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_CNTXT_12, ev_ch_e_cntxt_12, - 0x0001d030 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010030 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_CNTXT_13, ev_ch_e_cntxt_13, - 0x0001d034 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010034 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_SCRATCH_0, ev_ch_e_scratch_0, - 0x0001d048 + 0x4000 * GSI_EE_AP, 0x80); + 0x00010048 + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(EV_CH_E_SCRATCH_1, ev_ch_e_scratch_1, - 0x0001d04c + 0x4000 * GSI_EE_AP, 0x80); + 0x0001004c + 0x4000 * GSI_EE_AP, 0x80); REG_STRIDE(CH_C_DOORBELL_0, ch_c_doorbell_0, 0x00011000 + 0x4000 * GSI_EE_AP, 0x08); -- cgit From feafeb53140af3cde3fba46b292b15b3a0c0635c Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Tue, 14 Feb 2023 11:15:05 -0600 Subject: arm64: dts: imx8dxl-evk: Fix eqos phy reset gpio The deprecated property is named snps,reset-gpio, but this devicetree used snps,reset-gpios instead which results in the reset not being used and the following make dtbs_check error: ./arch/arm64/boot/dts/freescale/imx8dxl-evk.dtb: ethernet@5b050000: 'snps,reset-gpio' is a dependency of 'snps,reset-delays-us' From schema: ./Documentation/devicetree/bindings/net/snps,dwmac.yaml Use the preferred method of defining the reset gpio in the phy node itself. Note that this drops the 10 us pre-delay, but prior this wasn't used at all and a pre-delay doesn't make much sense in this context so it should be fine. Fixes: 8dd495d12374 ("arm64: dts: freescale: add support for i.MX8DXL EVK board") Signed-off-by: Andrew Halaney Acked-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8dxl-evk.dts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts b/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts index 1bcf228a22b8..852420349c01 100644 --- a/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8dxl-evk.dts @@ -121,8 +121,6 @@ phy-handle = <ðphy0>; nvmem-cells = <&fec_mac1>; nvmem-cell-names = "mac-address"; - snps,reset-gpios = <&pca6416_1 2 GPIO_ACTIVE_LOW>; - snps,reset-delays-us = <10 20 200000>; status = "okay"; mdio { @@ -136,6 +134,9 @@ eee-broken-1000t; qca,disable-smarteee; qca,disable-hibernation-mode; + reset-gpios = <&pca6416_1 2 GPIO_ACTIVE_LOW>; + reset-assert-us = <20>; + reset-deassert-us = <200000>; vddio-supply = <&vddio0>; vddio0: vddio-regulator { -- cgit From 32f86da7c86b27ebed31c24453a0713f612e43fb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Feb 2023 16:06:26 +0100 Subject: arm64: dts: imx8mm-nitrogen-r2: fix WM8960 clock name The WM8960 Linux driver expects the clock to be named "mclk". Otherwise the clock will be ignored and not prepared/enabled by the driver. Fixes: 40ba2eda0a7b ("arm64: dts: imx8mm-nitrogen-r2: add audio") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts index 6357078185ed..0e8f0d7161ad 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-nitrogen-r2.dts @@ -247,7 +247,7 @@ compatible = "wlf,wm8960"; reg = <0x1a>; clocks = <&clk IMX8MM_CLK_SAI1_ROOT>; - clock-names = "mclk1"; + clock-names = "mclk"; wlf,shared-lrclk; #sound-dai-cells = <0>; }; -- cgit From 1d0d5b917d6af71fa3c9599c3a7198a4175156a5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 17 Feb 2023 20:15:38 +0100 Subject: arm64: dts: imx8mp: Fix LCDIF2 node clock order The 'axi' clock are the bus APB clock, the 'disp_axi' clock are the pixel data AXI clock. The naming is confusing. Fix the clock order. Fixes: 94e6197dadc9 ("arm64: dts: imx8mp: Add LCDIF2 & LDB nodes") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index a19224fe1a6a..2dd60e3252f3 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1131,8 +1131,8 @@ reg = <0x32e90000 0x238>; interrupts = ; clocks = <&clk IMX8MP_CLK_MEDIA_DISP2_PIX_ROOT>, - <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>, - <&clk IMX8MP_CLK_MEDIA_APB_ROOT>; + <&clk IMX8MP_CLK_MEDIA_APB_ROOT>, + <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>; clock-names = "pix", "axi", "disp_axi"; assigned-clocks = <&clk IMX8MP_CLK_MEDIA_DISP2_PIX>, <&clk IMX8MP_VIDEO_PLL1>; -- cgit From 194c3e7d7e1230201fb341f40cfd87760a30e42a Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 23 Feb 2023 07:05:43 +0100 Subject: arm64: dts: imx93: Fix eqos properties 'macirq' is supposed to be listed first. Also only 'snps,clk-csr' is listed in the bindings while 'clk_csr' is only supported for legacy reasons. See commit 83936ea8d8ad2 ("net: stmmac: add a parse for new property 'snps,clk-csr'") Fixes: 1f4263ea6a4b ("arm64: dts: imx93: add eqos support") Signed-off-by: Alexander Stein Reviewed-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 2076f9c9983a..92e93c8af7f7 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -580,9 +580,9 @@ eqos: ethernet@428a0000 { compatible = "nxp,imx93-dwmac-eqos", "snps,dwmac-5.10a"; reg = <0x428a0000 0x10000>; - interrupts = , - ; - interrupt-names = "eth_wake_irq", "macirq"; + interrupts = , + ; + interrupt-names = "macirq", "eth_wake_irq"; clocks = <&clk IMX93_CLK_ENET_QOS_GATE>, <&clk IMX93_CLK_ENET_QOS_GATE>, <&clk IMX93_CLK_ENET_TIMER2>, @@ -595,7 +595,7 @@ <&clk IMX93_CLK_SYS_PLL_PFD0_DIV2>; assigned-clock-rates = <100000000>, <250000000>; intf_mode = <&wakeupmix_gpr 0x28>; - clk_csr = <0>; + snps,clk-csr = <0>; status = "disabled"; }; -- cgit From 3d37f7685d525e58674c23d607020e66d501dcd1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 26 Feb 2023 21:12:12 +0800 Subject: ARM: dts: imx6sll: e70k02: fix usbotg1 pinctrl usb@2184000: 'pinctrl-0' is a dependency of 'pinctrl-names' Signed-off-by: Peng Fan Fixes: 3bb3fd856505 ("ARM: dts: add Netronix E70K02 board common file") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/e70k02.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/e70k02.dtsi b/arch/arm/boot/dts/e70k02.dtsi index ace3eb8a97b8..4e1bf080eaca 100644 --- a/arch/arm/boot/dts/e70k02.dtsi +++ b/arch/arm/boot/dts/e70k02.dtsi @@ -321,6 +321,7 @@ &usbotg1 { pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg1>; disable-over-current; srp-disable; hnp-disable; -- cgit From 957c04e9784c7c757e8cc293d7fb2a60cdf461b6 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 26 Feb 2023 21:12:13 +0800 Subject: ARM: dts: imx6sll: e60k02: fix usbotg1 pinctrl usb@2184000: 'pinctrl-0' is a dependency of 'pinctrl-names' Signed-off-by: Peng Fan Fixes: c100ea86e6ab ("ARM: dts: add Netronix E60K02 board common file") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/e60k02.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/e60k02.dtsi b/arch/arm/boot/dts/e60k02.dtsi index 94944cc21931..dd03e3860f97 100644 --- a/arch/arm/boot/dts/e60k02.dtsi +++ b/arch/arm/boot/dts/e60k02.dtsi @@ -311,6 +311,7 @@ &usbotg1 { pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg1>; disable-over-current; srp-disable; hnp-disable; -- cgit From 1cd489e1ada1cffa56bd06fd4609f5a60a985d43 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 26 Feb 2023 21:12:14 +0800 Subject: ARM: dts: imx6sl: tolino-shine2hd: fix usbotg1 pinctrl usb@2184000: 'pinctrl-0' is a dependency of 'pinctrl-names' Signed-off-by: Peng Fan Fixes: 9c7016f1ca6d ("ARM: dts: imx: add devicetree for Tolino Shine 2 HD") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts b/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts index da1399057634..815119c12bd4 100644 --- a/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts +++ b/arch/arm/boot/dts/imx6sl-tolino-shine2hd.dts @@ -625,6 +625,7 @@ &usbotg1 { pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg1>; disable-over-current; srp-disable; hnp-disable; -- cgit From 62fb54148cd6eb456ff031be8fb447c98cf0bd9b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 28 Feb 2023 22:52:44 +0100 Subject: arm64: dts: imx8mn: specify #sound-dai-cells for SAI nodes Add #sound-dai-cells properties to SAI nodes. Reviewed-by: Adam Ford Reviewed-by: Fabio Estevam Fixes: 9e9860069725 ("arm64: dts: imx8mn: Add SAI nodes") Signed-off-by: Marek Vasut Reviewed-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index ed9ac6c5047c..9e0ddd6b7a32 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -296,6 +296,7 @@ sai2: sai@30020000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30020000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI2_IPG>, <&clk IMX8MN_CLK_DUMMY>, @@ -310,6 +311,7 @@ sai3: sai@30030000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30030000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI3_IPG>, <&clk IMX8MN_CLK_DUMMY>, @@ -324,6 +326,7 @@ sai5: sai@30050000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30050000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI5_IPG>, <&clk IMX8MN_CLK_DUMMY>, @@ -340,6 +343,7 @@ sai6: sai@30060000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x30060000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI6_IPG>, <&clk IMX8MN_CLK_DUMMY>, @@ -397,6 +401,7 @@ sai7: sai@300b0000 { compatible = "fsl,imx8mn-sai", "fsl,imx8mq-sai"; reg = <0x300b0000 0x10000>; + #sound-dai-cells = <0>; interrupts = ; clocks = <&clk IMX8MN_CLK_SAI7_IPG>, <&clk IMX8MN_CLK_DUMMY>, -- cgit From 1adab2922c58e7ff4fa9f0b43695079402cce876 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Mon, 6 Mar 2023 16:25:26 +0300 Subject: bus: imx-weim: fix branch condition evaluates to a garbage value If bus type is other than imx50_weim_devtype and have no child devices, variable 'ret' in function weim_parse_dt() will not be initialized, but will be used as branch condition and return value. Fix this by initializing 'ret' with 0. This was discovered with help of clang-analyzer, but the situation is quite possible in real life. Fixes: 52c47b63412b ("bus: imx-weim: improve error handling upon child probe-failure") Signed-off-by: Ivan Bornyakov Cc: stable@vger.kernel.org Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- drivers/bus/imx-weim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 2a6b4f676458..36d42484142a 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -204,8 +204,8 @@ static int weim_parse_dt(struct platform_device *pdev) const struct of_device_id *of_id = of_match_device(weim_id_table, &pdev->dev); const struct imx_weim_devtype *devtype = of_id->data; + int ret = 0, have_child = 0; struct device_node *child; - int ret, have_child = 0; struct weim_priv *priv; void __iomem *base; u32 reg; -- cgit From b3cdf730486b048ca0bf23bef050550d9fd40422 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 8 Mar 2023 11:17:20 +0100 Subject: arm64: dts: imx93: add missing #address-cells and #size-cells to i2c nodes Add them to the SoC .dtsi, so that not every board has to specify them. Fixes: 1225396fefea ("arm64: dts: imx93: add lpi2c nodes") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 92e93c8af7f7..41efd97dd6d6 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -164,6 +164,8 @@ lpi2c1: i2c@44340000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x44340000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C1_GATE>, <&clk IMX93_CLK_BUS_AON>; @@ -174,6 +176,8 @@ lpi2c2: i2c@44350000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x44350000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C2_GATE>, <&clk IMX93_CLK_BUS_AON>; @@ -343,6 +347,8 @@ lpi2c3: i2c@42530000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x42530000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C3_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -353,6 +359,8 @@ lpi2c4: i2c@42540000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x42540000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C4_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -455,6 +463,8 @@ lpi2c5: i2c@426b0000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x426b0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C5_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -465,6 +475,8 @@ lpi2c6: i2c@426c0000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x426c0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C6_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -475,6 +487,8 @@ lpi2c7: i2c@426d0000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x426d0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C7_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; @@ -485,6 +499,8 @@ lpi2c8: i2c@426e0000 { compatible = "fsl,imx93-lpi2c", "fsl,imx7ulp-lpi2c"; reg = <0x426e0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; interrupts = ; clocks = <&clk IMX93_CLK_LPI2C8_GATE>, <&clk IMX93_CLK_BUS_WAKEUP>; -- cgit From 6175b70df95ed3b8fe177cb20b9b00b1f9008cc4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 22 Feb 2023 23:01:15 -0800 Subject: powerpc/pseries: RTAS work area requires GENERIC_ALLOCATOR The RTAS work area allocator uses code that is built by GENERIC_ALLOCATOR, so the PSERIES Kconfig should select the required Kconfig symbol to fix multiple build errors. powerpc64-linux-ld: arch/powerpc/platforms/pseries/rtas-work-area.o: in function `.rtas_work_area_allocator_init': rtas-work-area.c:(.init.text+0x288): undefined reference to `.gen_pool_create' powerpc64-linux-ld: rtas-work-area.c:(.init.text+0x2dc): undefined reference to `.gen_pool_set_algo' powerpc64-linux-ld: rtas-work-area.c:(.init.text+0x310): undefined reference to `.gen_pool_add_owner' powerpc64-linux-ld: rtas-work-area.c:(.init.text+0x43c): undefined reference to `.gen_pool_destroy' powerpc64-linux-ld: arch/powerpc/platforms/pseries/rtas-work-area.o:(.toc+0x0): undefined reference to `gen_pool_first_fit_order_align' powerpc64-linux-ld: arch/powerpc/platforms/pseries/rtas-work-area.o: in function `.__rtas_work_area_alloc': rtas-work-area.c:(.ref.text+0x14c): undefined reference to `.gen_pool_alloc_algo_owner' powerpc64-linux-ld: rtas-work-area.c:(.ref.text+0x238): undefined reference to `.gen_pool_alloc_algo_owner' powerpc64-linux-ld: arch/powerpc/platforms/pseries/rtas-work-area.o: in function `.rtas_work_area_free': rtas-work-area.c:(.ref.text+0x44c): undefined reference to `.gen_pool_free_owner' Fixes: 43033bc62d34 ("powerpc/pseries: add RTAS work area allocator") Signed-off-by: Randy Dunlap Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20230223070116.660-2-rdunlap@infradead.org --- arch/powerpc/platforms/pseries/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index b481c5c8bae1..21b22bf16ce6 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -7,6 +7,7 @@ config PPC_PSERIES select OF_DYNAMIC select FORCE_PCI select PCI_MSI + select GENERIC_ALLOCATOR select PPC_XICS select PPC_XIVE_SPAPR select PPC_ICP_NATIVE -- cgit From 934ef33ee75c3846f605f18b65048acd147e3918 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 13 Mar 2023 15:45:48 +0100 Subject: x86/PVH: obtain VGA console info in Dom0 A new platform-op was added to Xen to allow obtaining the same VGA console information PV Dom0 is handed. Invoke the new function and have the output data processed by xen_init_vga(). Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/8f315e92-7bda-c124-71cc-478ab9c5e610@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/Makefile | 2 +- arch/x86/xen/enlighten_pv.c | 3 ++- arch/x86/xen/enlighten_pvh.c | 13 +++++++++++++ arch/x86/xen/vga.c | 5 ++--- arch/x86/xen/xen-ops.h | 7 ++++--- include/xen/interface/platform.h | 3 +++ 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3c5b52fbe4a7..a9ec8c9f5c5d 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -45,6 +45,6 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o -obj-$(CONFIG_XEN_PV_DOM0) += vga.o +obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5b1379662877..68f5f5d209df 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1389,7 +1389,8 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si) x86_platform.set_legacy_features = xen_dom0_set_legacy_features; - xen_init_vga(info, xen_start_info->console.dom0.info_size); + xen_init_vga(info, xen_start_info->console.dom0.info_size, + &boot_params.screen_info); xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index bcae606bbc5c..1da44aca896c 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -43,6 +43,19 @@ void __init xen_pvh_init(struct boot_params *boot_params) x86_init.oem.banner = xen_banner; xen_efi_init(boot_params); + + if (xen_initial_domain()) { + struct xen_platform_op op = { + .cmd = XENPF_get_dom0_console, + }; + long ret = HYPERVISOR_platform_op(&op); + + if (ret > 0) + xen_init_vga(&op.u.dom0_console, + min(ret * sizeof(char), + sizeof(op.u.dom0_console)), + &boot_params->screen_info); + } } void __init mem_map_via_hcall(struct boot_params *boot_params_p) diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index 14ea32e734d5..d97adab8420f 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -9,10 +9,9 @@ #include "xen-ops.h" -void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) +void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size, + struct screen_info *screen_info) { - struct screen_info *screen_info = &boot_params.screen_info; - /* This is drawn from a dump from vgacon:startup in * standard Linux. */ screen_info->orig_video_mode = 3; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9a8bb972193d..a10903785a33 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -108,11 +108,12 @@ static inline void xen_uninit_lock_cpu(int cpu) struct dom0_vga_console_info; -#ifdef CONFIG_XEN_PV_DOM0 -void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size); +#ifdef CONFIG_XEN_DOM0 +void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size, + struct screen_info *); #else static inline void __init xen_init_vga(const struct dom0_vga_console_info *info, - size_t size) + size_t size, struct screen_info *si) { } #endif diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index 655d92e803e1..79a443c65ea9 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -483,6 +483,8 @@ struct xenpf_symdata { }; DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata); +#define XENPF_get_dom0_console 64 + struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -506,6 +508,7 @@ struct xen_platform_op { struct xenpf_mem_hotadd mem_add; struct xenpf_core_parking core_parking; struct xenpf_symdata symdata; + struct dom0_vga_console_info dom0_console; uint8_t pad[128]; } u; }; -- cgit From b4ee9606378bb9520c94d8b96f0305c3696f5c29 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Wed, 1 Mar 2023 10:21:06 -0600 Subject: drm/amdkfd: Fix BO offset for multi-VMA page migration svm_migrate_ram_to_vram migrates a prange from sys ram to vram. The prange may cross multiple vma. Need remember current dst vram offset in the TTM resource for each migration. v2: squash in warning fix (Alex) Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index de8ce72344fc..391da6acb3e5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -289,7 +289,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) static int svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, - dma_addr_t *scratch) + dma_addr_t *scratch, uint64_t ttm_res_offset) { uint64_t npages = migrate->npages; struct device *dev = adev->dev; @@ -299,8 +299,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t i, j; int r; - pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, - prange->last); + pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start, + prange->last, ttm_res_offset); src = scratch; dst = (uint64_t *)(scratch + npages); @@ -311,7 +311,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, goto out; } - amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT, + amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); for (i = j = 0; i < npages; i++) { struct page *spage; @@ -398,7 +398,7 @@ out: static long svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, - uint64_t end, uint32_t trigger) + uint64_t end, uint32_t trigger, uint64_t ttm_res_offset) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; @@ -451,7 +451,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, else pr_debug("0x%lx pages migrated\n", cpages); - r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch); + r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, ttm_res_offset); migrate_vma_pages(&migrate); pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", @@ -499,6 +499,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long addr, start, end; struct vm_area_struct *vma; struct amdgpu_device *adev; + uint64_t ttm_res_offset; unsigned long cpages = 0; long r = 0; @@ -519,6 +520,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; + ttm_res_offset = prange->offset << PAGE_SHIFT; for (addr = start; addr < end;) { unsigned long next; @@ -528,13 +530,14 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; } else { cpages += r; } + ttm_res_offset += next - addr; addr = next; } -- cgit From 8eeddc0d4200762063e1c66b9cc63afa7b24ebf0 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Thu, 9 Mar 2023 17:44:55 -0600 Subject: drm/amdkfd: Get prange->offset after svm_range_vram_node_new During miration to vram prange->offset is valid after vram buffer is located, either use old one or allocate a new one. Move svm_range_vram_node_new before migrate for each vma to get valid prange->offset. v2: squash in warning fix Fixes: b4ee9606378b ("drm/amdkfd: Fix BO offset for multi-VMA page migration") Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 391da6acb3e5..54933903bcb8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -305,12 +305,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, src = scratch; dst = (uint64_t *)(scratch + npages); - r = svm_range_vram_node_new(adev, prange, true); - if (r) { - dev_dbg(adev->dev, "fail %d to alloc vram\n", r); - goto out; - } - amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); for (i = j = 0; i < npages; i++) { @@ -391,7 +385,7 @@ out_free_vram_pages: migrate->dst[i + 3] = 0; } #endif -out: + return r; } @@ -520,6 +514,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; + + r = svm_range_vram_node_new(adev, prange, true); + if (r) { + dev_dbg(adev->dev, "fail %ld to alloc vram\n", r); + return r; + } ttm_res_offset = prange->offset << PAGE_SHIFT; for (addr = start; addr < end;) { @@ -543,6 +543,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, if (cpages) prange->actual_loc = best_loc; + else + svm_range_vram_node_free(prange); return r < 0 ? r : 0; } -- cgit From b2ca5c5d416b4e72d1e9d0293fc720e2d525fd42 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Tue, 7 Mar 2023 16:19:02 -0800 Subject: drm/amdkfd: fix a potential double free in pqm_create_queue Set *q to NULL on errors, otherwise pqm_create_queue would free it again. Signed-off-by: Chia-I Wu Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 5137476ec18e..4236539d9f93 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -218,8 +218,8 @@ static int init_user_queue(struct process_queue_manager *pqm, return 0; cleanup: - if (dev->shared_resources.enable_mes) - uninit_queue(*q); + uninit_queue(*q); + *q = NULL; return retval; } -- cgit From ab9bdb1213b4b40942af6a383f555d0c14874c1b Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 1 Mar 2023 10:53:03 +0800 Subject: drm/amd/pm: bump SMU 13.0.4 driver_if header version Align the SMU driver interface version with PMFW to suppress the version mismatch message on driver loading. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h index f77401709d83..2162ecd1057d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 7 +#define PMFW_DRIVER_IF_VERSION 8 typedef struct { int32_t value; @@ -198,7 +198,7 @@ typedef struct { uint16_t SkinTemp; uint16_t DeviceState; uint16_t CurTemp; //[centi-Celsius] - uint16_t spare2; + uint16_t FilterAlphaValue; uint16_t AverageGfxclkFrequency; uint16_t AverageFclkFrequency; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 1c0ae2cb757b..f085cb97a620 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -29,7 +29,7 @@ #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x37 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x37 -- cgit From a9386ee9681585794dbab95d4ce6826f73d19af6 Mon Sep 17 00:00:00 2001 From: Błażej Szczygieł Date: Sun, 5 Mar 2023 00:44:31 +0100 Subject: drm/amd/pm: Fix sienna cichlid incorrect OD volage after resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always setup overdrive tables after resume. Preserve only some user-defined settings in user_overdrive_table if they're set. Copy restored user_overdrive_table into od_table to get correct values. On cold boot, BTC was triggered and GfxVfCurve was calibrated. We got VfCurve settings (a). On resuming back, BTC will be triggered again and GfxVfCurve will be recalibrated. VfCurve settings (b) got may be different from those of cold boot. So if we reuse those VfCurve settings (a) got on cold boot on suspend, we can run into discrepencies. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1897 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2276 Reviewed-by: Evan Quan Signed-off-by: Błażej Szczygieł Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 43 +++++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 697e98a0a20a..75f18681e984 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2143,16 +2143,9 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu) (OverDriveTable_t *)smu->smu_table.boot_overdrive_table; OverDriveTable_t *user_od_table = (OverDriveTable_t *)smu->smu_table.user_overdrive_table; + OverDriveTable_t user_od_table_bak; int ret = 0; - /* - * For S3/S4/Runpm resume, no need to setup those overdrive tables again as - * - either they already have the default OD settings got during cold bootup - * - or they have some user customized OD settings which cannot be overwritten - */ - if (smu->adev->in_suspend) - return 0; - ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)boot_od_table, false); if (ret) { @@ -2163,7 +2156,23 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu) sienna_cichlid_dump_od_table(smu, boot_od_table); memcpy(od_table, boot_od_table, sizeof(OverDriveTable_t)); - memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t)); + + /* + * For S3/S4/Runpm resume, we need to setup those overdrive tables again, + * but we have to preserve user defined values in "user_od_table". + */ + if (!smu->adev->in_suspend) { + memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t)); + smu->user_dpm_profile.user_od = false; + } else if (smu->user_dpm_profile.user_od) { + memcpy(&user_od_table_bak, user_od_table, sizeof(OverDriveTable_t)); + memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t)); + user_od_table->GfxclkFmin = user_od_table_bak.GfxclkFmin; + user_od_table->GfxclkFmax = user_od_table_bak.GfxclkFmax; + user_od_table->UclkFmin = user_od_table_bak.UclkFmin; + user_od_table->UclkFmax = user_od_table_bak.UclkFmax; + user_od_table->VddGfxOffset = user_od_table_bak.VddGfxOffset; + } return 0; } @@ -2373,6 +2382,20 @@ static int sienna_cichlid_od_edit_dpm_table(struct smu_context *smu, return ret; } +static int sienna_cichlid_restore_user_od_settings(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table = table_context->overdrive_table; + OverDriveTable_t *user_od_table = table_context->user_overdrive_table; + int res; + + res = smu_v11_0_restore_user_od_settings(smu); + if (res == 0) + memcpy(od_table, user_od_table, sizeof(OverDriveTable_t)); + + return res; +} + static int sienna_cichlid_run_btc(struct smu_context *smu) { int res; @@ -4400,7 +4423,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .set_default_od_settings = sienna_cichlid_set_default_od_settings, .od_edit_dpm_table = sienna_cichlid_od_edit_dpm_table, - .restore_user_od_settings = smu_v11_0_restore_user_od_settings, + .restore_user_od_settings = sienna_cichlid_restore_user_od_settings, .run_btc = sienna_cichlid_run_btc, .set_power_source = smu_v11_0_set_power_source, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, -- cgit From d71e38df3b730a17ab6b25cabb2ccfe8a7f04385 Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Tue, 28 Feb 2023 18:48:41 +0800 Subject: drm/amdgpu/vcn: custom video info caps for sriov for sriov, we added a new flag to indicate av1 support, this will override the original caps info. Signed-off-by: Jane Jian Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 3 +- drivers/gpu/drm/amd/amdgpu/soc21.c | 103 +++++++++++++++++++++++++--- 3 files changed, 99 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b9e9480448af..4f7bab52282a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -124,6 +124,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_PP_ONE_VF = (1 << 4), /* Indirect Reg Access enabled */ AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5), + /* AV1 Support MODE*/ + AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -322,6 +324,8 @@ static inline bool is_virtual_machine(void) ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug) #define amdgpu_sriov_is_normal(adev) \ ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) +#define amdgpu_sriov_is_av1_support(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 6c97148ca0ed..24d42d24e6a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -93,7 +93,8 @@ union amd_sriov_msg_feature_flags { uint32_t mm_bw_management : 1; uint32_t pp_one_vf_mode : 1; uint32_t reg_indirect_acc : 1; - uint32_t reserved : 26; + uint32_t av1_support : 1; + uint32_t reserved : 25; } flags; uint32_t all; }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 061793d390cc..c82b3a7ea5f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -102,6 +102,59 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1, }; +/* SRIOV SOC21, not const since data is controlled by host */ +static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, +}; + +static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = { + .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0), + .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn0, +}; + +static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = { + .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1), + .codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn1, +}; + +static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, +}; + +static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn0 = { + .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0), + .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn0, +}; + +static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = { + .codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1), + .codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1, +}; + static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -112,16 +165,31 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, case IP_VERSION(4, 0, 0): case IP_VERSION(4, 0, 2): case IP_VERSION(4, 0, 4): - if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) { - if (encode) - *codecs = &vcn_4_0_0_video_codecs_encode_vcn1; - else - *codecs = &vcn_4_0_0_video_codecs_decode_vcn1; + if (amdgpu_sriov_vf(adev)) { + if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) || + !amdgpu_sriov_is_av1_support(adev)) { + if (encode) + *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn1; + else + *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn1; + } else { + if (encode) + *codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn0; + else + *codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn0; + } } else { - if (encode) - *codecs = &vcn_4_0_0_video_codecs_encode_vcn0; - else - *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; + if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)) { + if (encode) + *codecs = &vcn_4_0_0_video_codecs_encode_vcn1; + else + *codecs = &vcn_4_0_0_video_codecs_decode_vcn1; + } else { + if (encode) + *codecs = &vcn_4_0_0_video_codecs_encode_vcn0; + else + *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; + } } return 0; default: @@ -730,8 +798,23 @@ static int soc21_common_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_get_irq(adev); + if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) || + !amdgpu_sriov_is_av1_support(adev)) { + amdgpu_virt_update_sriov_video_codec(adev, + sriov_vcn_4_0_0_video_codecs_encode_array_vcn1, + ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1), + sriov_vcn_4_0_0_video_codecs_decode_array_vcn1, + ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1)); + } else { + amdgpu_virt_update_sriov_video_codec(adev, + sriov_vcn_4_0_0_video_codecs_encode_array_vcn0, + ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0), + sriov_vcn_4_0_0_video_codecs_decode_array_vcn0, + ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0)); + } + } return 0; } -- cgit From 9da050b0d9e04439d225a2ec3044af70cdfb3933 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Wed, 8 Mar 2023 13:37:24 -0800 Subject: drm/amdkfd: fix potential kgd_mem UAFs kgd_mem pointers returned by kfd_process_device_translate_handle are only guaranteed to be valid while p->mutex is held. As soon as the mutex is unlocked, another thread can free the BO. Signed-off-by: Chia-I Wu Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a0e30f21e12e..de310ed367ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1312,14 +1312,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, args->n_success = i+1; } - mutex_unlock(&p->mutex); - err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } + mutex_unlock(&p->mutex); + /* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) { peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); @@ -1335,9 +1335,9 @@ get_process_device_data_failed: bind_process_to_device_failed: get_mem_obj_from_handle_failed: map_memory_to_gpu_failed: +sync_memory_failed: mutex_unlock(&p->mutex); copy_from_user_failed: -sync_memory_failed: kfree(devices_arr); return err; @@ -1351,6 +1351,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, void *mem; long err = 0; uint32_t *devices_arr = NULL, i; + bool flush_tlb; if (!args->n_devices) { pr_debug("Device IDs array empty\n"); @@ -1403,16 +1404,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, } args->n_success = i+1; } - mutex_unlock(&p->mutex); - if (kfd_flush_tlb_after_unmap(pdd->dev)) { + flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev); + if (flush_tlb) { err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } + } + mutex_unlock(&p->mutex); + if (flush_tlb) { /* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) { peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); @@ -1428,9 +1432,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, bind_process_to_device_failed: get_mem_obj_from_handle_failed: unmap_memory_from_gpu_failed: +sync_memory_failed: mutex_unlock(&p->mutex); copy_from_user_failed: -sync_memory_failed: kfree(devices_arr); return err; } -- cgit From 728cefa53a36ba378ed4a7f31a0c08289687d824 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 17 Feb 2023 16:08:21 -0500 Subject: drm/amd/display: Fix HDCP failing to enable after suspend [Why] On resume some displays are not ready for HDCP, so they will fail if we start the hdcp authentintication too soon. Add a delay so that the displays can be ready before we start. NOTE: Previoulsy this delay was set to 3 seconds but it was causing issues with compliance, 2 seconds should enough for compliance and the s3 resume case. [How] Change the Delay to 2 seconds. Reviewed-by: Aurabindo Pillai Acked-by: Qingqing Zhuo Signed-off-by: Bhawanpreet Lakha Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 8e572f07ec47..4abfd2c9679f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -561,7 +561,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) link->dp.mst_enabled = config->mst_enabled; link->dp.usb4_enabled = config->usb4_enabled; display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION; - link->adjust.auth_delay = 0; + link->adjust.auth_delay = 2; link->adjust.hdcp1.disable = 0; conn_state = aconnector->base.state; -- cgit From 3fadda5de8073e2cb65744803a6941736411d55b Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 9 Mar 2023 10:02:45 +0800 Subject: drm/amdgpu: move poll enabled/disable into non DC path Some amd asics having reliable hotplug support don't call drm_kms_helper_poll_init in driver init sequence. However, due to the unified suspend/resume path for all asics, because the output_poll_work->func is not set for these asics, a warning arrives when suspending. [ 90.656049] [ 90.656050] ? console_unlock+0x4d/0x100 [ 90.656053] ? __irq_work_queue_local+0x27/0x60 [ 90.656056] ? irq_work_queue+0x2b/0x50 [ 90.656057] ? __wake_up_klogd+0x40/0x60 [ 90.656059] __cancel_work_timer+0xed/0x180 [ 90.656061] drm_kms_helper_poll_disable.cold+0x1f/0x2c [drm_kms_helper] [ 90.656072] amdgpu_device_suspend+0x81/0x170 [amdgpu] [ 90.656180] amdgpu_pmops_runtime_suspend+0xb5/0x1b0 [amdgpu] [ 90.656269] pci_pm_runtime_suspend+0x61/0x1b0 drm_kms_helper_poll_enable/disable is valid when poll_init is called in amdgpu code, which is only used in non DC path. So move such codes into non-DC path code to get rid of such warnings. v1: introduce use_kms_poll flag in amdgpu as the poll stuff check v2: use dc_enabled as the flag to simply code v3: move code into non DC path instead of relying on any flag Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2411 Fixes: a4e771729a51 ("drm/probe_helper: sort out poll_running vs poll_enabled") Reported-by: Bert Karwatzki Suggested-by: Dmitry Baryshkov Suggested-by: Alex Deucher Signed-off-by: Guchun Chen Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c4a4e2fe6681..da5b0258a237 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4145,8 +4145,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) DRM_WARN("smart shift update failed\n"); - drm_kms_helper_poll_disable(dev); - if (fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); @@ -4243,8 +4241,6 @@ exit: if (fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); - drm_kms_helper_poll_enable(dev); - amdgpu_ras_resume(adev); if (adev->mode_info.num_crtc) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 503f89a766c3..d60fe7eb5579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1618,6 +1618,8 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) struct drm_connector_list_iter iter; int r; + drm_kms_helper_poll_disable(dev); + /* turn off display hw */ drm_modeset_lock_all(dev); drm_connector_list_iter_begin(dev, &iter); @@ -1694,6 +1696,8 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev) drm_modeset_unlock_all(dev); + drm_kms_helper_poll_enable(dev); + return 0; } -- cgit From 751281c55579f0cb0e56c9797d4663f689909681 Mon Sep 17 00:00:00 2001 From: Benjamin Cheng Date: Sun, 12 Mar 2023 20:47:39 -0400 Subject: drm/amd/display: Write to correct dirty_rect When FB_DAMAGE_CLIPS are provided in a non-MPO scenario, the loop does not use the counter i. This causes the fill_dc_dity_rect() to always fill dirty_rects[0], causing graphical artifacts when a damage clip aware DRM client sends more than 1 damage clip. Instead, use the flip_addrs->dirty_rect_count which is incremented by fill_dc_dirty_rect() on a successful fill. Fixes: 30ebe41582d1 ("drm/amd/display: add FB_DAMAGE_CLIPS support") Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2453 Signed-off-by: Benjamin Cheng Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 009ef917dad4..32abbafd43fa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5105,9 +5105,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, for (; flip_addrs->dirty_rect_count < num_clips; clips++) fill_dc_dirty_rect(new_plane_state->plane, - &dirty_rects[i], clips->x1, - clips->y1, clips->x2 - clips->x1, - clips->y2 - clips->y1, + &dirty_rects[flip_addrs->dirty_rect_count], + clips->x1, clips->y1, + clips->x2 - clips->x1, clips->y2 - clips->y1, &flip_addrs->dirty_rect_count, false); return; -- cgit From 45aa07fa832412f1de99194f37fd847915d7e0f6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Mar 2023 22:45:59 -0500 Subject: drm/amdgpu/nv: fix codec array for SR_IOV Copy paste error. Fixes: 384334120b66 ("drm/amdgpu/nv: don't expose AV1 if VCN0 is harvested") Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4454 Cc: Jiapeng Chong Acked-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 855d390c41de..22e25ca285f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1055,8 +1055,8 @@ static int nv_common_late_init(void *handle) amdgpu_virt_update_sriov_video_codec(adev, sriov_sc_video_codecs_encode_array, ARRAY_SIZE(sriov_sc_video_codecs_encode_array), - sriov_sc_video_codecs_decode_array_vcn1, - ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1)); + sriov_sc_video_codecs_decode_array_vcn0, + ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0)); } } -- cgit From 542a56e8eb4467ae654eefab31ff194569db39cd Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Sun, 12 Mar 2023 13:51:00 -0300 Subject: drm/amdgpu/vcn: Disable indirect SRAM on Vangogh broken BIOSes The VCN firmware loading path enables the indirect SRAM mode if it's advertised as supported. We might have some cases of FW issues that prevents this mode to working properly though, ending-up in a failed probe. An example below, observed in the Steam Deck: [...] [drm] failed to load ucode VCN0_RAM(0x3A) [drm] psp gfx command LOAD_IP_FW(0x6) failed and response status is (0xFFFF0000) amdgpu 0000:04:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring vcn_dec_0 test failed (-110) [drm:amdgpu_device_init.cold [amdgpu]] *ERROR* hw_init of IP block failed -110 amdgpu 0000:04:00.0: amdgpu: amdgpu_device_ip_init failed amdgpu 0000:04:00.0: amdgpu: Fatal error during GPU init [...] Disabling the VCN block circumvents this, but it's a very invasive workaround that turns off the entire feature. So, let's add a quirk on VCN loading that checks for known problematic BIOSes on Vangogh, so we can proactively disable the indirect SRAM mode and allow the HW proper probe and VCN IP block to work fine. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2385 Fixes: 82132ecc5432 ("drm/amdgpu: enable Vangogh VCN indirect sram mode") Cc: stable@vger.kernel.org Cc: James Zhu Cc: Leo Liu Signed-off-by: Guilherme G. Piccoli Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 25217b05c0ea..e7974de8b035 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -114,6 +115,24 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; + /* + * Some Steam Deck's BIOS versions are incompatible with the + * indirect SRAM mode, leading to amdgpu being unable to get + * properly probed (and even potentially crashing the kernel). + * Hence, check for these versions here - notice this is + * restricted to Vangogh (Deck's APU). + */ + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 2)) { + const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION); + + if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) || + !strncmp("F7A0114", bios_ver, 7))) { + adev->vcn.indirect_sram = false; + dev_info(adev->dev, + "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver); + } + } + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); -- cgit From 34e0a279a993debaff03158fc2fbf6a00c093643 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Mar 2023 10:30:02 +0100 Subject: block: do not reverse request order when flushing plug list Commit 26fed4ac4eab ("block: flush plug based on hardware and software queue order") changed flushing of plug list to submit requests one device at a time. However while doing that it also started using list_add_tail() instead of list_add() used previously thus effectively submitting requests in reverse order. Also when forming a rq_list with remaining requests (in case two or more devices are used), we effectively reverse the ordering of the plug list for each device we process. Submitting requests in reverse order has negative impact on performance for rotational disks (when BFQ is not in use). We observe 10-25% regression in random 4k write throughput, as well as ~20% regression in MariaDB OLTP benchmark on rotational storage on btrfs filesystem. Fix the problem by preserving ordering of the plug list when inserting requests into the queuelist as well as by appending to requeue_list instead of prepending to it. Fixes: 26fed4ac4eab ("block: flush plug based on hardware and software queue order") Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230313093002.11756-1-jack@suse.cz Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 +++-- include/linux/blk-mq.h | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index d0cb2ef18fe2..cf1a39adf9a5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2725,6 +2725,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) struct blk_mq_hw_ctx *this_hctx = NULL; struct blk_mq_ctx *this_ctx = NULL; struct request *requeue_list = NULL; + struct request **requeue_lastp = &requeue_list; unsigned int depth = 0; LIST_HEAD(list); @@ -2735,10 +2736,10 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched) this_hctx = rq->mq_hctx; this_ctx = rq->mq_ctx; } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { - rq_list_add(&requeue_list, rq); + rq_list_add_tail(&requeue_lastp, rq); continue; } - list_add_tail(&rq->queuelist, &list); + list_add(&rq->queuelist, &list); depth++; } while (!rq_list_empty(plug->mq_list)); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index dd5ce1137f04..de0b0c3e7395 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -228,6 +228,12 @@ static inline unsigned short req_get_ioprio(struct request *req) *(listptr) = rq; \ } while (0) +#define rq_list_add_tail(lastpptr, rq) do { \ + (rq)->rq_next = NULL; \ + **(lastpptr) = rq; \ + *(lastpptr) = &rq->rq_next; \ +} while (0) + #define rq_list_pop(listptr) \ ({ \ struct request *__req = NULL; \ -- cgit From 28e8cabe80f3e6e3c98121576eda898eeb20f1b1 Mon Sep 17 00:00:00 2001 From: Kristian Overskeid Date: Tue, 7 Mar 2023 14:32:29 +0100 Subject: net: hsr: Don't log netdev_err message on unknown prp dst node If no frames has been exchanged with a node for HSR_NODE_FORGET_TIME, the node will be deleted from the node_db list. If a frame is sent to the node after it is deleted, a netdev_err message for each slave interface is produced. This should not happen with dan nodes because of supervision frames, but can happen often with san nodes, which clutters the kernel log. Since the hsr protocol does not support sans, this is only relevant for the prp protocol. Signed-off-by: Kristian Overskeid Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 00db74d96583..865eda39d601 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -415,7 +415,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); if (!node_dst) { - if (net_ratelimit()) + if (net_ratelimit() && port->hsr->prot_version != PRP_V1) netdev_err(skb->dev, "%s: Unknown node\n", __func__); return; } -- cgit From 9026c0bf233db53b86f74f4c620715e94eb32a09 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 13 Mar 2023 00:49:24 +0000 Subject: ALSA: asihpi: check pao in control_message() control_message() might be called with pao = NULL. Here indicates control_message() as sample. (B) static void control_message(struct hpi_adapter_obj *pao, ...) { ^^^ struct hpi_hw_obj *phw = pao->priv; ... ^^^ } (A) void _HPI_6205(struct hpi_adapter_obj *pao, ...) { ^^^ ... case HPI_OBJ_CONTROL: (B) control_message(pao, phm, phr); break; ^^^ ... } void HPI_6205(...) { ... (A) _HPI_6205(NULL, phm, phr); ... ^^^^ } Therefore, We will get too many warning via cppcheck, like below sound/pci/asihpi/hpi6205.c:238:27: warning: Possible null pointer dereference: pao [nullPointer] struct hpi_hw_obj *phw = pao->priv; ^ sound/pci/asihpi/hpi6205.c:433:13: note: Calling function '_HPI_6205', 1st argument 'NULL' value is 0 _HPI_6205(NULL, phm, phr); ^ sound/pci/asihpi/hpi6205.c:401:20: note: Calling function 'control_message', 1st argument 'pao' value is 0 control_message(pao, phm, phr); ^ Set phr->error like many functions doing, and don't call _HPI_6205() with NULL. Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87ttypeaqz.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpi6205.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/asihpi/hpi6205.c b/sound/pci/asihpi/hpi6205.c index 27e11b5f70b9..c7d7eff86727 100644 --- a/sound/pci/asihpi/hpi6205.c +++ b/sound/pci/asihpi/hpi6205.c @@ -430,7 +430,7 @@ void HPI_6205(struct hpi_message *phm, struct hpi_response *phr) pao = hpi_find_adapter(phm->adapter_index); } else { /* subsys messages don't address an adapter */ - _HPI_6205(NULL, phm, phr); + phr->error = HPI_ERROR_INVALID_OBJ_INDEX; return; } -- cgit From 98e5eb110095ec77cb6d775051d181edbf9cd3cf Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 13 Mar 2023 00:50:28 +0000 Subject: ALSA: hda/ca0132: fixup buffer overrun at tuning_ctl_set() tuning_ctl_set() might have buffer overrun at (X) if it didn't break from loop by matching (A). static int tuning_ctl_set(...) { for (i = 0; i < TUNING_CTLS_COUNT; i++) (A) if (nid == ca0132_tuning_ctls[i].nid) break; snd_hda_power_up(...); (X) dspio_set_param(..., ca0132_tuning_ctls[i].mid, ...); snd_hda_power_down(...); ^ return 1; } We will get below error by cppcheck sound/pci/hda/patch_ca0132.c:4229:2: note: After for loop, i has value 12 for (i = 0; i < TUNING_CTLS_COUNT; i++) ^ sound/pci/hda/patch_ca0132.c:4234:43: note: Array index out of bounds dspio_set_param(codec, ca0132_tuning_ctls[i].mid, 0x20, ^ This patch cares non match case. Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87sfe9eap7.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index acde4cd58785..099722ebaed8 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -4228,8 +4228,10 @@ static int tuning_ctl_set(struct hda_codec *codec, hda_nid_t nid, for (i = 0; i < TUNING_CTLS_COUNT; i++) if (nid == ca0132_tuning_ctls[i].nid) - break; + goto found; + return -EINVAL; +found: snd_hda_power_up(codec); dspio_set_param(codec, ca0132_tuning_ctls[i].mid, 0x20, ca0132_tuning_ctls[i].req, -- cgit From ccb820dc7d2236b1af0d54ae038a27b5b6d5ae5a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 13 Mar 2023 15:12:29 -0700 Subject: fscrypt: destroy keyring after security_sb_delete() fscrypt_destroy_keyring() must be called after all potentially-encrypted inodes were evicted; otherwise it cannot safely destroy the keyring. Since inodes that are in-use by the Landlock LSM don't get evicted until security_sb_delete(), this means that fscrypt_destroy_keyring() must be called *after* security_sb_delete(). This fixes a WARN_ON followed by a NULL dereference, only possible if Landlock was being used on encrypted files. Fixes: d7e7b9af104c ("fscrypt: stop using keyrings subsystem for fscrypt_master_key") Cc: stable@vger.kernel.org Reported-by: syzbot+93e495f6a4f748827c88@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/00000000000044651705f6ca1e30@google.com Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20230313221231.272498-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/super.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/super.c b/fs/super.c index 84332d5cb817..04bc62ab7dfe 100644 --- a/fs/super.c +++ b/fs/super.c @@ -475,13 +475,22 @@ void generic_shutdown_super(struct super_block *sb) cgroup_writeback_umount(); - /* evict all inodes with zero refcount */ + /* Evict all inodes with zero refcount. */ evict_inodes(sb); - /* only nonzero refcount inodes can have marks */ + + /* + * Clean up and evict any inodes that still have references due + * to fsnotify or the security policy. + */ fsnotify_sb_delete(sb); - fscrypt_destroy_keyring(sb); security_sb_delete(sb); + /* + * Now that all potentially-encrypted inodes have been evicted, + * the fscrypt keyring can be destroyed. + */ + fscrypt_destroy_keyring(sb); + if (sb->s_dio_done_wq) { destroy_workqueue(sb->s_dio_done_wq); sb->s_dio_done_wq = NULL; -- cgit From 42da2c00b91486980a724c05c29818a7e60a067e Mon Sep 17 00:00:00 2001 From: Xujun Leng Date: Sun, 12 Mar 2023 15:14:23 +0800 Subject: docs: process: typo fix In the second paragraph of section "Respond to review comments", there is a spelling mistake: "aganst" should be "against". Signed-off-by: Xujun Leng Link: https://lore.kernel.org/r/20230312071423.3042-1-lengxujun2007@126.com Signed-off-by: Jonathan Corbet --- Documentation/process/submitting-patches.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index eac7167dce83..69ce64e03c70 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -320,7 +320,7 @@ for their time. Code review is a tiring and time-consuming process, and reviewers sometimes get grumpy. Even in that case, though, respond politely and address the problems they have pointed out. When sending a next version, add a ``patch changelog`` to the cover letter or to individual patches -explaining difference aganst previous submission (see +explaining difference against previous submission (see :ref:`the_canonical_patch_format`). See Documentation/process/email-clients.rst for recommendations on email -- cgit From d7ba3657d5162bd551e5c653f67f941c94a7dc0a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 10 Mar 2023 10:58:57 +0100 Subject: docs: vfio: fix header path The text points to a different header file, fix by changing the path to "uapi". Signed-off-by: Jiri Pirko Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20230310095857.985814-1-jiri@resnulli.us Signed-off-by: Jonathan Corbet --- Documentation/driver-api/vfio.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst index 50b690f7f663..68abc089d6dd 100644 --- a/Documentation/driver-api/vfio.rst +++ b/Documentation/driver-api/vfio.rst @@ -242,7 +242,7 @@ group and can access them as follows:: VFIO User API ------------------------------------------------------------------------------- -Please see include/linux/vfio.h for complete API documentation. +Please see include/uapi/linux/vfio.h for complete API documentation. VFIO bus driver API ------------------------------------------------------------------------------- -- cgit From 9c88ea00fef03031ce6554531e89be82f6a42835 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 9 Mar 2023 13:58:52 -0500 Subject: NFS: Fix /proc/PID/io read_bytes for buffered reads Prior to commit 8786fde8421c ("Convert NFS from readpages to readahead"), nfs_readpages() used the old mm interface read_cache_pages() which called task_io_account_read() for each NFS page read. After this commit, nfs_readpages() is converted to nfs_readahead(), which now uses the new mm interface readahead_page(). The new interface requires callers to call task_io_account_read() themselves. In addition, to nfs_readahead() task_io_account_read() should also be called from nfs_read_folio(). Fixes: 8786fde8421c ("Convert NFS from readpages to readahead") Link: https://lore.kernel.org/linux-nfs/CAPt2mGNEYUk5u8V4abe=5MM5msZqmvzCVrtCP4Qw1n=gCHCnww@mail.gmail.com/ Signed-off-by: Dave Wysochanski Signed-off-by: Anna Schumaker --- fs/nfs/read.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c380cff4108e..e90988591df4 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -337,6 +338,7 @@ int nfs_read_folio(struct file *file, struct folio *folio) trace_nfs_aop_readpage(inode, folio); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); + task_io_account_read(folio_size(folio)); /* * Try to flush any pending writes to the file.. @@ -393,6 +395,7 @@ void nfs_readahead(struct readahead_control *ractl) trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages); nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); + task_io_account_read(readahead_length(ractl)); ret = -ESTALE; if (NFS_STALE(inode)) -- cgit From 044b14b51d78a549664efac3a5a4c9edd1211b79 Mon Sep 17 00:00:00 2001 From: Nikita Romanyuk Date: Sat, 25 Feb 2023 10:12:28 +0300 Subject: drivers: video: logo: fix code style issues in pnmtologo.c Signed-off-by: Nikita Romanyuk Signed-off-by: Helge Deller --- drivers/video/logo/pnmtologo.c | 667 +++++++++++++++++++++-------------------- 1 file changed, 334 insertions(+), 333 deletions(-) diff --git a/drivers/video/logo/pnmtologo.c b/drivers/video/logo/pnmtologo.c index 4718d7895f0b..78cb95f3cfce 100644 --- a/drivers/video/logo/pnmtologo.c +++ b/drivers/video/logo/pnmtologo.c @@ -1,4 +1,3 @@ - /* * Convert a logo in ASCII PNM format to C source suitable for inclusion in * the Linux kernel @@ -34,37 +33,37 @@ static FILE *out; #define LINUX_LOGO_GRAY256 4 /* 256 levels grayscale */ static const char *logo_types[LINUX_LOGO_GRAY256+1] = { - [LINUX_LOGO_MONO] = "LINUX_LOGO_MONO", - [LINUX_LOGO_VGA16] = "LINUX_LOGO_VGA16", - [LINUX_LOGO_CLUT224] = "LINUX_LOGO_CLUT224", - [LINUX_LOGO_GRAY256] = "LINUX_LOGO_GRAY256" + [LINUX_LOGO_MONO] = "LINUX_LOGO_MONO", + [LINUX_LOGO_VGA16] = "LINUX_LOGO_VGA16", + [LINUX_LOGO_CLUT224] = "LINUX_LOGO_CLUT224", + [LINUX_LOGO_GRAY256] = "LINUX_LOGO_GRAY256" }; #define MAX_LINUX_LOGO_COLORS 224 struct color { - unsigned char red; - unsigned char green; - unsigned char blue; + unsigned char red; + unsigned char green; + unsigned char blue; }; static const struct color clut_vga16[16] = { - { 0x00, 0x00, 0x00 }, - { 0x00, 0x00, 0xaa }, - { 0x00, 0xaa, 0x00 }, - { 0x00, 0xaa, 0xaa }, - { 0xaa, 0x00, 0x00 }, - { 0xaa, 0x00, 0xaa }, - { 0xaa, 0x55, 0x00 }, - { 0xaa, 0xaa, 0xaa }, - { 0x55, 0x55, 0x55 }, - { 0x55, 0x55, 0xff }, - { 0x55, 0xff, 0x55 }, - { 0x55, 0xff, 0xff }, - { 0xff, 0x55, 0x55 }, - { 0xff, 0x55, 0xff }, - { 0xff, 0xff, 0x55 }, - { 0xff, 0xff, 0xff }, + { 0x00, 0x00, 0x00 }, + { 0x00, 0x00, 0xaa }, + { 0x00, 0xaa, 0x00 }, + { 0x00, 0xaa, 0xaa }, + { 0xaa, 0x00, 0x00 }, + { 0xaa, 0x00, 0xaa }, + { 0xaa, 0x55, 0x00 }, + { 0xaa, 0xaa, 0xaa }, + { 0x55, 0x55, 0x55 }, + { 0x55, 0x55, 0xff }, + { 0x55, 0xff, 0x55 }, + { 0x55, 0xff, 0xff }, + { 0xff, 0x55, 0x55 }, + { 0xff, 0x55, 0xff }, + { 0xff, 0xff, 0x55 }, + { 0xff, 0xff, 0xff }, }; @@ -77,438 +76,440 @@ static unsigned int logo_clutsize; static int is_plain_pbm = 0; static void die(const char *fmt, ...) - __attribute__ ((noreturn)) __attribute ((format (printf, 1, 2))); -static void usage(void) __attribute ((noreturn)); +__attribute__((noreturn)) __attribute((format (printf, 1, 2))); +static void usage(void) __attribute((noreturn)); static unsigned int get_number(FILE *fp) { - int c, val; - - /* Skip leading whitespace */ - do { - c = fgetc(fp); - if (c == EOF) - die("%s: end of file\n", filename); - if (c == '#') { - /* Ignore comments 'till end of line */ - do { + int c, val; + + /* Skip leading whitespace */ + do { + c = fgetc(fp); + if (c == EOF) + die("%s: end of file\n", filename); + if (c == '#') { + /* Ignore comments 'till end of line */ + do { + c = fgetc(fp); + if (c == EOF) + die("%s: end of file\n", filename); + } while (c != '\n'); + } + } while (isspace(c)); + + /* Parse decimal number */ + val = 0; + while (isdigit(c)) { + val = 10*val+c-'0'; + /* some PBM are 'broken'; GiMP for example exports a PBM without space + * between the digits. This is Ok cause we know a PBM can only have a '1' + * or a '0' for the digit. + */ + if (is_plain_pbm) + break; c = fgetc(fp); if (c == EOF) - die("%s: end of file\n", filename); - } while (c != '\n'); + die("%s: end of file\n", filename); } - } while (isspace(c)); - - /* Parse decimal number */ - val = 0; - while (isdigit(c)) { - val = 10*val+c-'0'; - /* some PBM are 'broken'; GiMP for example exports a PBM without space - * between the digits. This is Ok cause we know a PBM can only have a '1' - * or a '0' for the digit. */ - if (is_plain_pbm) - break; - c = fgetc(fp); - if (c == EOF) - die("%s: end of file\n", filename); - } - return val; + return val; } static unsigned int get_number255(FILE *fp, unsigned int maxval) { - unsigned int val = get_number(fp); - return (255*val+maxval/2)/maxval; + unsigned int val = get_number(fp); + + return (255*val+maxval/2)/maxval; } static void read_image(void) { - FILE *fp; - unsigned int i, j; - int magic; - unsigned int maxval; - - /* open image file */ - fp = fopen(filename, "r"); - if (!fp) - die("Cannot open file %s: %s\n", filename, strerror(errno)); - - /* check file type and read file header */ - magic = fgetc(fp); - if (magic != 'P') - die("%s is not a PNM file\n", filename); - magic = fgetc(fp); - switch (magic) { + FILE *fp; + unsigned int i, j; + int magic; + unsigned int maxval; + + /* open image file */ + fp = fopen(filename, "r"); + if (!fp) + die("Cannot open file %s: %s\n", filename, strerror(errno)); + + /* check file type and read file header */ + magic = fgetc(fp); + if (magic != 'P') + die("%s is not a PNM file\n", filename); + magic = fgetc(fp); + switch (magic) { case '1': case '2': case '3': - /* Plain PBM/PGM/PPM */ - break; + /* Plain PBM/PGM/PPM */ + break; case '4': case '5': case '6': - /* Binary PBM/PGM/PPM */ - die("%s: Binary PNM is not supported\n" + /* Binary PBM/PGM/PPM */ + die("%s: Binary PNM is not supported\n" "Use pnmnoraw(1) to convert it to ASCII PNM\n", filename); default: - die("%s is not a PNM file\n", filename); - } - logo_width = get_number(fp); - logo_height = get_number(fp); - - /* allocate image data */ - logo_data = (struct color **)malloc(logo_height*sizeof(struct color *)); - if (!logo_data) - die("%s\n", strerror(errno)); - for (i = 0; i < logo_height; i++) { - logo_data[i] = malloc(logo_width*sizeof(struct color)); + die("%s is not a PNM file\n", filename); + } + logo_width = get_number(fp); + logo_height = get_number(fp); + + /* allocate image data */ + logo_data = (struct color **)malloc(logo_height*sizeof(struct color *)); + if (!logo_data) + die("%s\n", strerror(errno)); + for (i = 0; i < logo_height; i++) { + logo_data[i] = malloc(logo_width*sizeof(struct color)); if (!logo_data[i]) - die("%s\n", strerror(errno)); - } + die("%s\n", strerror(errno)); + } - /* read image data */ - switch (magic) { + /* read image data */ + switch (magic) { case '1': - /* Plain PBM */ - is_plain_pbm = 1; - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) - logo_data[i][j].red = logo_data[i][j].green = - logo_data[i][j].blue = 255*(1-get_number(fp)); - break; + /* Plain PBM */ + is_plain_pbm = 1; + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) + logo_data[i][j].red = logo_data[i][j].green = + logo_data[i][j].blue = 255*(1-get_number(fp)); + break; case '2': - /* Plain PGM */ - maxval = get_number(fp); - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) - logo_data[i][j].red = logo_data[i][j].green = - logo_data[i][j].blue = get_number255(fp, maxval); - break; + /* Plain PGM */ + maxval = get_number(fp); + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) + logo_data[i][j].red = logo_data[i][j].green = + logo_data[i][j].blue = get_number255(fp, maxval); + break; case '3': - /* Plain PPM */ - maxval = get_number(fp); - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) { - logo_data[i][j].red = get_number255(fp, maxval); - logo_data[i][j].green = get_number255(fp, maxval); - logo_data[i][j].blue = get_number255(fp, maxval); - } - break; - } + /* Plain PPM */ + maxval = get_number(fp); + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) { + logo_data[i][j].red = get_number255(fp, maxval); + logo_data[i][j].green = get_number255(fp, maxval); + logo_data[i][j].blue = get_number255(fp, maxval); + } + break; + } - /* close file */ - fclose(fp); + /* close file */ + fclose(fp); } static inline int is_black(struct color c) { - return c.red == 0 && c.green == 0 && c.blue == 0; + return c.red == 0 && c.green == 0 && c.blue == 0; } static inline int is_white(struct color c) { - return c.red == 255 && c.green == 255 && c.blue == 255; + return c.red == 255 && c.green == 255 && c.blue == 255; } static inline int is_gray(struct color c) { - return c.red == c.green && c.red == c.blue; + return c.red == c.green && c.red == c.blue; } static inline int is_equal(struct color c1, struct color c2) { - return c1.red == c2.red && c1.green == c2.green && c1.blue == c2.blue; + return c1.red == c2.red && c1.green == c2.green && c1.blue == c2.blue; } static void write_header(void) { - /* open logo file */ - if (outputname) { - out = fopen(outputname, "w"); - if (!out) - die("Cannot create file %s: %s\n", outputname, strerror(errno)); - } else { - out = stdout; - } - - fputs("/*\n", out); - fputs(" * DO NOT EDIT THIS FILE!\n", out); - fputs(" *\n", out); - fprintf(out, " * It was automatically generated from %s\n", filename); - fputs(" *\n", out); - fprintf(out, " * Linux logo %s\n", logoname); - fputs(" */\n\n", out); - fputs("#include \n\n", out); - fprintf(out, "static unsigned char %s_data[] __initdata = {\n", - logoname); + /* open logo file */ + if (outputname) { + out = fopen(outputname, "w"); + if (!out) + die("Cannot create file %s: %s\n", outputname, strerror(errno)); + } else { + out = stdout; + } + + fputs("/*\n", out); + fputs(" * DO NOT EDIT THIS FILE!\n", out); + fputs(" *\n", out); + fprintf(out, " * It was automatically generated from %s\n", filename); + fputs(" *\n", out); + fprintf(out, " * Linux logo %s\n", logoname); + fputs(" */\n\n", out); + fputs("#include \n\n", out); + fprintf(out, "static unsigned char %s_data[] __initdata = {\n", + logoname); } static void write_footer(void) { - fputs("\n};\n\n", out); - fprintf(out, "const struct linux_logo %s __initconst = {\n", logoname); - fprintf(out, "\t.type\t\t= %s,\n", logo_types[logo_type]); - fprintf(out, "\t.width\t\t= %d,\n", logo_width); - fprintf(out, "\t.height\t\t= %d,\n", logo_height); - if (logo_type == LINUX_LOGO_CLUT224) { - fprintf(out, "\t.clutsize\t= %d,\n", logo_clutsize); - fprintf(out, "\t.clut\t\t= %s_clut,\n", logoname); - } - fprintf(out, "\t.data\t\t= %s_data\n", logoname); - fputs("};\n\n", out); - - /* close logo file */ - if (outputname) - fclose(out); + fputs("\n};\n\n", out); + fprintf(out, "const struct linux_logo %s __initconst = {\n", logoname); + fprintf(out, "\t.type\t\t= %s,\n", logo_types[logo_type]); + fprintf(out, "\t.width\t\t= %d,\n", logo_width); + fprintf(out, "\t.height\t\t= %d,\n", logo_height); + if (logo_type == LINUX_LOGO_CLUT224) { + fprintf(out, "\t.clutsize\t= %d,\n", logo_clutsize); + fprintf(out, "\t.clut\t\t= %s_clut,\n", logoname); + } + fprintf(out, "\t.data\t\t= %s_data\n", logoname); + fputs("};\n\n", out); + + /* close logo file */ + if (outputname) + fclose(out); } static int write_hex_cnt; static void write_hex(unsigned char byte) { - if (write_hex_cnt % 12) - fprintf(out, ", 0x%02x", byte); - else if (write_hex_cnt) - fprintf(out, ",\n\t0x%02x", byte); - else - fprintf(out, "\t0x%02x", byte); - write_hex_cnt++; + if (write_hex_cnt % 12) + fprintf(out, ", 0x%02x", byte); + else if (write_hex_cnt) + fprintf(out, ",\n\t0x%02x", byte); + else + fprintf(out, "\t0x%02x", byte); + write_hex_cnt++; } static void write_logo_mono(void) { - unsigned int i, j; - unsigned char val, bit; - - /* validate image */ - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) - if (!is_black(logo_data[i][j]) && !is_white(logo_data[i][j])) - die("Image must be monochrome\n"); - - /* write file header */ - write_header(); - - /* write logo data */ - for (i = 0; i < logo_height; i++) { - for (j = 0; j < logo_width;) { - for (val = 0, bit = 0x80; bit && j < logo_width; j++, bit >>= 1) - if (logo_data[i][j].red) - val |= bit; - write_hex(val); + unsigned int i, j; + unsigned char val, bit; + + /* validate image */ + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) + if (!is_black(logo_data[i][j]) && !is_white(logo_data[i][j])) + die("Image must be monochrome\n"); + + /* write file header */ + write_header(); + + /* write logo data */ + for (i = 0; i < logo_height; i++) { + for (j = 0; j < logo_width;) { + for (val = 0, bit = 0x80; bit && j < logo_width; j++, bit >>= 1) + if (logo_data[i][j].red) + val |= bit; + write_hex(val); + } } - } - /* write logo structure and file footer */ - write_footer(); + /* write logo structure and file footer */ + write_footer(); } static void write_logo_vga16(void) { - unsigned int i, j, k; - unsigned char val; - - /* validate image */ - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) { - for (k = 0; k < 16; k++) - if (is_equal(logo_data[i][j], clut_vga16[k])) - break; - if (k == 16) - die("Image must use the 16 console colors only\n" - "Use ppmquant(1) -map clut_vga16.ppm to reduce the number " - "of colors\n"); - } + unsigned int i, j, k; + unsigned char val; - /* write file header */ - write_header(); - - /* write logo data */ - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) { - for (k = 0; k < 16; k++) - if (is_equal(logo_data[i][j], clut_vga16[k])) - break; - val = k<<4; - if (++j < logo_width) { - for (k = 0; k < 16; k++) - if (is_equal(logo_data[i][j], clut_vga16[k])) - break; - val |= k; - } - write_hex(val); - } + /* validate image */ + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) { + for (k = 0; k < 16; k++) + if (is_equal(logo_data[i][j], clut_vga16[k])) + break; + if (k == 16) + die("Image must use the 16 console colors only\n" + "Use ppmquant(1) -map clut_vga16.ppm to reduce the number " + "of colors\n"); + } + + /* write file header */ + write_header(); - /* write logo structure and file footer */ - write_footer(); + /* write logo data */ + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) { + for (k = 0; k < 16; k++) + if (is_equal(logo_data[i][j], clut_vga16[k])) + break; + val = k<<4; + if (++j < logo_width) { + for (k = 0; k < 16; k++) + if (is_equal(logo_data[i][j], clut_vga16[k])) + break; + val |= k; + } + write_hex(val); + } + + /* write logo structure and file footer */ + write_footer(); } static void write_logo_clut224(void) { - unsigned int i, j, k; - - /* validate image */ - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) { - for (k = 0; k < logo_clutsize; k++) - if (is_equal(logo_data[i][j], logo_clut[k])) - break; - if (k == logo_clutsize) { - if (logo_clutsize == MAX_LINUX_LOGO_COLORS) - die("Image has more than %d colors\n" - "Use ppmquant(1) to reduce the number of colors\n", - MAX_LINUX_LOGO_COLORS); - logo_clut[logo_clutsize++] = logo_data[i][j]; - } - } + unsigned int i, j, k; - /* write file header */ - write_header(); + /* validate image */ + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) { + for (k = 0; k < logo_clutsize; k++) + if (is_equal(logo_data[i][j], logo_clut[k])) + break; + if (k == logo_clutsize) { + if (logo_clutsize == MAX_LINUX_LOGO_COLORS) + die("Image has more than %d colors\n" + "Use ppmquant(1) to reduce the number of colors\n", + MAX_LINUX_LOGO_COLORS); + logo_clut[logo_clutsize++] = logo_data[i][j]; + } + } - /* write logo data */ - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) { - for (k = 0; k < logo_clutsize; k++) - if (is_equal(logo_data[i][j], logo_clut[k])) - break; - write_hex(k+32); + /* write file header */ + write_header(); + + /* write logo data */ + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) { + for (k = 0; k < logo_clutsize; k++) + if (is_equal(logo_data[i][j], logo_clut[k])) + break; + write_hex(k+32); + } + fputs("\n};\n\n", out); + + /* write logo clut */ + fprintf(out, "static unsigned char %s_clut[] __initdata = {\n", + logoname); + write_hex_cnt = 0; + for (i = 0; i < logo_clutsize; i++) { + write_hex(logo_clut[i].red); + write_hex(logo_clut[i].green); + write_hex(logo_clut[i].blue); } - fputs("\n};\n\n", out); - - /* write logo clut */ - fprintf(out, "static unsigned char %s_clut[] __initdata = {\n", - logoname); - write_hex_cnt = 0; - for (i = 0; i < logo_clutsize; i++) { - write_hex(logo_clut[i].red); - write_hex(logo_clut[i].green); - write_hex(logo_clut[i].blue); - } - - /* write logo structure and file footer */ - write_footer(); + + /* write logo structure and file footer */ + write_footer(); } static void write_logo_gray256(void) { - unsigned int i, j; + unsigned int i, j; - /* validate image */ - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) - if (!is_gray(logo_data[i][j])) - die("Image must be grayscale\n"); + /* validate image */ + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) + if (!is_gray(logo_data[i][j])) + die("Image must be grayscale\n"); - /* write file header */ - write_header(); + /* write file header */ + write_header(); - /* write logo data */ - for (i = 0; i < logo_height; i++) - for (j = 0; j < logo_width; j++) - write_hex(logo_data[i][j].red); + /* write logo data */ + for (i = 0; i < logo_height; i++) + for (j = 0; j < logo_width; j++) + write_hex(logo_data[i][j].red); - /* write logo structure and file footer */ - write_footer(); + /* write logo structure and file footer */ + write_footer(); } static void die(const char *fmt, ...) { - va_list ap; + va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); - exit(1); + exit(1); } static void usage(void) { - die("\n" + die("\n" "Usage: %s [options] \n" "\n" "Valid options:\n" - " -h : display this usage information\n" - " -n : specify logo name (default: linux_logo)\n" - " -o : output to file instead of stdout\n" - " -t : specify logo type, one of\n" - " mono : monochrome black/white\n" - " vga16 : 16 colors VGA text palette\n" - " clut224 : 224 colors (default)\n" - " gray256 : 256 levels grayscale\n" + " -h : display this usage information\n" + " -n : specify logo name (default: linux_logo)\n" + " -o : output to file instead of stdout\n" + " -t : specify logo type, one of\n" + " mono : monochrome black/white\n" + " vga16 : 16 colors VGA text palette\n" + " clut224 : 224 colors (default)\n" + " gray256 : 256 levels grayscale\n" "\n", programname); } int main(int argc, char *argv[]) { - int opt; + int opt; - programname = argv[0]; + programname = argv[0]; - opterr = 0; - while (1) { - opt = getopt(argc, argv, "hn:o:t:"); - if (opt == -1) - break; + opterr = 0; + while (1) { + opt = getopt(argc, argv, "hn:o:t:"); + if (opt == -1) + break; - switch (opt) { - case 'h': - usage(); - break; + switch (opt) { + case 'h': + usage(); + break; - case 'n': - logoname = optarg; - break; + case 'n': + logoname = optarg; + break; - case 'o': - outputname = optarg; - break; + case 'o': + outputname = optarg; + break; - case 't': - if (!strcmp(optarg, "mono")) - logo_type = LINUX_LOGO_MONO; - else if (!strcmp(optarg, "vga16")) - logo_type = LINUX_LOGO_VGA16; - else if (!strcmp(optarg, "clut224")) - logo_type = LINUX_LOGO_CLUT224; - else if (!strcmp(optarg, "gray256")) - logo_type = LINUX_LOGO_GRAY256; - else - usage(); - break; + case 't': + if (!strcmp(optarg, "mono")) + logo_type = LINUX_LOGO_MONO; + else if (!strcmp(optarg, "vga16")) + logo_type = LINUX_LOGO_VGA16; + else if (!strcmp(optarg, "clut224")) + logo_type = LINUX_LOGO_CLUT224; + else if (!strcmp(optarg, "gray256")) + logo_type = LINUX_LOGO_GRAY256; + else + usage(); + break; - default: - usage(); - break; + default: + usage(); + break; + } } - } - if (optind != argc-1) - usage(); + if (optind != argc-1) + usage(); - filename = argv[optind]; + filename = argv[optind]; - read_image(); - switch (logo_type) { + read_image(); + switch (logo_type) { case LINUX_LOGO_MONO: - write_logo_mono(); - break; + write_logo_mono(); + break; case LINUX_LOGO_VGA16: - write_logo_vga16(); - break; + write_logo_vga16(); + break; case LINUX_LOGO_CLUT224: - write_logo_clut224(); - break; + write_logo_clut224(); + break; case LINUX_LOGO_GRAY256: - write_logo_gray256(); - break; - } - exit(0); + write_logo_gray256(); + break; + } + exit(0); } -- cgit From 8f0e056068f26c0f1121f7f96a6b4515f59160f2 Mon Sep 17 00:00:00 2001 From: Nikita Romanyuk Date: Sat, 25 Feb 2023 10:12:29 +0300 Subject: drivers: video: logo: add SPDX comment, remove GPL notice in pnmtologo.c Signed-off-by: Nikita Romanyuk Reviewed-by: Geert Uytterhoeven Signed-off-by: Helge Deller --- drivers/video/logo/pnmtologo.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/video/logo/pnmtologo.c b/drivers/video/logo/pnmtologo.c index 78cb95f3cfce..ada5ef6e51b7 100644 --- a/drivers/video/logo/pnmtologo.c +++ b/drivers/video/logo/pnmtologo.c @@ -1,14 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Convert a logo in ASCII PNM format to C source suitable for inclusion in * the Linux kernel * * (C) Copyright 2001-2003 by Geert Uytterhoeven - * - * -------------------------------------------------------------------------- - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. */ #include -- cgit From 7f501aa71da9dc2eaae2b0118a151cad018d33b0 Mon Sep 17 00:00:00 2001 From: Lucy Mielke Date: Tue, 7 Feb 2023 11:06:30 +0100 Subject: fbdev: omapfb: cleanup inconsistent indentation This cleans up the indentation according to the Linux kernel coding style, and should fix the warning created by the kernel test robot. Fixes: 8b08cf2b64f5 ("OMAP: add TI OMAP framebuffer driver") Reported-by: kernel test robot Signed-off-by: Lucy Mielke Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index 1f3df2055ff0..18736079843d 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -544,19 +544,25 @@ static int set_fb_var(struct fb_info *fbi, var->yoffset = var->yres_virtual - var->yres; if (plane->color_mode == OMAPFB_COLOR_RGB444) { - var->red.offset = 8; var->red.length = 4; - var->red.msb_right = 0; - var->green.offset = 4; var->green.length = 4; - var->green.msb_right = 0; - var->blue.offset = 0; var->blue.length = 4; - var->blue.msb_right = 0; + var->red.offset = 8; + var->red.length = 4; + var->red.msb_right = 0; + var->green.offset = 4; + var->green.length = 4; + var->green.msb_right = 0; + var->blue.offset = 0; + var->blue.length = 4; + var->blue.msb_right = 0; } else { - var->red.offset = 11; var->red.length = 5; - var->red.msb_right = 0; - var->green.offset = 5; var->green.length = 6; - var->green.msb_right = 0; - var->blue.offset = 0; var->blue.length = 5; - var->blue.msb_right = 0; + var->red.offset = 11; + var->red.length = 5; + var->red.msb_right = 0; + var->green.offset = 5; + var->green.length = 6; + var->green.msb_right = 0; + var->blue.offset = 0; + var->blue.length = 5; + var->blue.msb_right = 0; } var->height = -1; -- cgit From 33bf61c0a1a97e2abff3117751eabad28106a7c5 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 8 Mar 2023 08:19:21 +0100 Subject: MAINTAINERS: orphan SIS FRAMEBUFFER DRIVER This was triggered by the fact that the webpage: http://www.winischhofer.net/linuxsisvga.shtml cannot be reached anymore. Thomas Winischhofer is still reachable at the given email address, but he has not been active since 2005. Mark the SIS FRAMEBUFFER DRIVER as orphan to reflect the current state. Signed-off-by: Lukas Bulwahn Acked-by: Thomas Zimmermann Signed-off-by: Helge Deller --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ec57c42ed544..d603402b0810 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19150,9 +19150,7 @@ W: http://www.brownhat.org/sis900.html F: drivers/net/ethernet/sis/sis900.* SIS FRAMEBUFFER DRIVER -M: Thomas Winischhofer -S: Maintained -W: http://www.winischhofer.net/linuxsisvga.shtml +S: Orphan F: Documentation/fb/sisfb.rst F: drivers/video/fbdev/sis/ F: include/video/sisfb.h -- cgit From f90bd245de82c095187d8c2cabb8b488a39eaecc Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Tue, 7 Mar 2023 13:08:56 +0000 Subject: fbdev: tgafb: Fix potential divide by zero fb_set_var would by called when user invokes ioctl with cmd FBIOPUT_VSCREENINFO. User-provided data would finally reach tgafb_check_var. In case var->pixclock is assigned to zero, divide by zero would occur when checking whether reciprocal of var->pixclock is too high. Similar crashes have happened in other fbdev drivers. There is no check and modification on var->pixclock along the call chain to tgafb_check_var. We believe it could also be triggered in driver tgafb from user site. Signed-off-by: Wei Chen Signed-off-by: Helge Deller --- drivers/video/fbdev/tgafb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/tgafb.c b/drivers/video/fbdev/tgafb.c index 14d37c49633c..b44004880f0d 100644 --- a/drivers/video/fbdev/tgafb.c +++ b/drivers/video/fbdev/tgafb.c @@ -173,6 +173,9 @@ tgafb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { struct tga_par *par = (struct tga_par *)info->par; + if (!var->pixclock) + return -EINVAL; + if (par->tga_type == TGA_TYPE_8PLANE) { if (var->bits_per_pixel != 8) return -EINVAL; -- cgit From 7eb1220f4bde36a15b26e8f54480406c72081bfa Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 8 Mar 2023 13:49:50 +0800 Subject: fbdev: clps711x-fb: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Signed-off-by: Helge Deller --- drivers/video/fbdev/clps711x-fb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c index 45c75ff01eca..c8bfc608bd9c 100644 --- a/drivers/video/fbdev/clps711x-fb.c +++ b/drivers/video/fbdev/clps711x-fb.c @@ -238,8 +238,7 @@ static int clps711x_fb_probe(struct platform_device *pdev) info->fix.mmio_start = res->start; info->fix.mmio_len = resource_size(res); - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - info->screen_base = devm_ioremap_resource(dev, res); + info->screen_base = devm_platform_get_and_ioremap_resource(pdev, 1, &res); if (IS_ERR(info->screen_base)) { ret = PTR_ERR(info->screen_base); goto out_fb_release; -- cgit From 096dc32bb73d20e0854b3a471379f577f903f0ee Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:30 -0600 Subject: fbdev: Use of_property_read_bool() for boolean properties It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. Convert reading boolean properties to to of_property_read_bool(). Signed-off-by: Rob Herring Signed-off-by: Helge Deller --- drivers/video/fbdev/offb.c | 4 ++-- drivers/video/fbdev/sm501fb.c | 4 ++-- drivers/video/fbdev/tcx.c | 3 +-- drivers/video/fbdev/xilinxfb.c | 3 +-- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c index f7ad6bc9d02d..b97d251d894b 100644 --- a/drivers/video/fbdev/offb.c +++ b/drivers/video/fbdev/offb.c @@ -549,10 +549,10 @@ static void offb_init_nodriver(struct platform_device *parent, struct device_nod int foreign_endian = 0; #ifdef __BIG_ENDIAN - if (of_get_property(dp, "little-endian", NULL)) + if (of_property_read_bool(dp, "little-endian")) foreign_endian = FBINFO_FOREIGN_ENDIAN; #else - if (of_get_property(dp, "big-endian", NULL)) + if (of_property_read_bool(dp, "big-endian")) foreign_endian = FBINFO_FOREIGN_ENDIAN; #endif diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c index f743bfbde2a6..1f3cbe723def 100644 --- a/drivers/video/fbdev/sm501fb.c +++ b/drivers/video/fbdev/sm501fb.c @@ -1737,10 +1737,10 @@ static int sm501fb_init_fb(struct fb_info *fb, enum sm501_controller head, #if defined(CONFIG_OF) #ifdef __BIG_ENDIAN - if (of_get_property(info->dev->parent->of_node, "little-endian", NULL)) + if (of_property_read_bool(info->dev->parent->of_node, "little-endian")) fb->flags |= FBINFO_FOREIGN_ENDIAN; #else - if (of_get_property(info->dev->parent->of_node, "big-endian", NULL)) + if (of_property_read_bool(info->dev->parent->of_node, "big-endian")) fb->flags |= FBINFO_FOREIGN_ENDIAN; #endif #endif diff --git a/drivers/video/fbdev/tcx.c b/drivers/video/fbdev/tcx.c index 01d87f53324d..f2eaf6e7fff6 100644 --- a/drivers/video/fbdev/tcx.c +++ b/drivers/video/fbdev/tcx.c @@ -379,8 +379,7 @@ static int tcx_probe(struct platform_device *op) spin_lock_init(&par->lock); - par->lowdepth = - (of_find_property(dp, "tcx-8-bit", NULL) != NULL); + par->lowdepth = of_property_read_bool(dp, "tcx-8-bit"); sbusfb_fill_var(&info->var, dp, 8); info->var.red.length = 8; diff --git a/drivers/video/fbdev/xilinxfb.c b/drivers/video/fbdev/xilinxfb.c index 1ac83900a21c..c17cfffd9a84 100644 --- a/drivers/video/fbdev/xilinxfb.c +++ b/drivers/video/fbdev/xilinxfb.c @@ -469,8 +469,7 @@ static int xilinxfb_of_probe(struct platform_device *pdev) pdata.yvirt = prop[1]; } - if (of_find_property(pdev->dev.of_node, "rotate-display", NULL)) - pdata.rotate_screen = 1; + pdata.rotate_screen = of_property_read_bool(pdev->dev.of_node, "rotate-display"); platform_set_drvdata(pdev, drvdata); return xilinxfb_assign(pdev, drvdata, &pdata); -- cgit From 0db0a1eb444dc9f79241d673ea611741211acdae Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 14 Mar 2023 13:42:17 +0800 Subject: fbdev: pxa3xx-gcu: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Signed-off-by: Helge Deller --- drivers/video/fbdev/pxa3xx-gcu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c index c3cd1e1cc01b..d16729215423 100644 --- a/drivers/video/fbdev/pxa3xx-gcu.c +++ b/drivers/video/fbdev/pxa3xx-gcu.c @@ -599,8 +599,7 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev) priv->misc_dev.fops = &pxa3xx_gcu_miscdev_fops; /* handle IO resources */ - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->mmio_base = devm_ioremap_resource(dev, r); + priv->mmio_base = devm_platform_get_and_ioremap_resource(pdev, 0, &r); if (IS_ERR(priv->mmio_base)) return PTR_ERR(priv->mmio_base); -- cgit From be66c2cbc05ed84821f3dde6439044ab44506525 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 14 Mar 2023 13:42:18 +0800 Subject: fbdev: wm8505fb: Use devm_platform_ioremap_resource() According to commit 7945f929f1a7 ("drivers: provide devm_platform_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to Use devm_platform_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Signed-off-by: Helge Deller --- drivers/video/fbdev/wm8505fb.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/video/fbdev/wm8505fb.c b/drivers/video/fbdev/wm8505fb.c index 8f4d674fa0d0..96a6f7623e19 100644 --- a/drivers/video/fbdev/wm8505fb.c +++ b/drivers/video/fbdev/wm8505fb.c @@ -261,7 +261,6 @@ static const struct fb_ops wm8505fb_ops = { static int wm8505fb_probe(struct platform_device *pdev) { struct wm8505fb_info *fbi; - struct resource *res; struct display_timings *disp_timing; void *addr; int ret; @@ -299,8 +298,7 @@ static int wm8505fb_probe(struct platform_device *pdev) addr = addr + sizeof(struct wm8505fb_info); fbi->fb.pseudo_palette = addr; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - fbi->regbase = devm_ioremap_resource(&pdev->dev, res); + fbi->regbase = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(fbi->regbase)) return PTR_ERR(fbi->regbase); -- cgit From 4c7e8e05bca577380d03a2731a14b3cb5652b4f3 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 14 Mar 2023 13:42:19 +0800 Subject: fbdev: xilinxfb: Use devm_platform_get_and_ioremap_resource() According to commit 890cc39a8799 ("drivers: provide devm_platform_get_and_ioremap_resource()"), convert platform_get_resource(), devm_ioremap_resource() to a single call to devm_platform_get_and_ioremap_resource(), as this is exactly what this function does. Signed-off-by: Yang Li Signed-off-by: Helge Deller --- drivers/video/fbdev/xilinxfb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/fbdev/xilinxfb.c b/drivers/video/fbdev/xilinxfb.c index c17cfffd9a84..7911354827dc 100644 --- a/drivers/video/fbdev/xilinxfb.c +++ b/drivers/video/fbdev/xilinxfb.c @@ -273,8 +273,7 @@ static int xilinxfb_assign(struct platform_device *pdev, if (drvdata->flags & BUS_ACCESS_FLAG) { struct resource *res; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - drvdata->regs = devm_ioremap_resource(&pdev->dev, res); + drvdata->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(drvdata->regs)) return PTR_ERR(drvdata->regs); -- cgit From e140980ef211d537833500377ae411c3f232b41d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2023 17:27:10 +0100 Subject: fbdev: omapfb: remove omap1 osk driver Commit 21a3e6eed423 ("ARM: omap1: remove osk-mistral add-on board support") removed the platform_device definition for the "lcd_osk" device, so this driver is now unused and can be removed as well. Signed-off-by: Arnd Bergmann Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/Makefile | 1 - drivers/video/fbdev/omap/lcd_osk.c | 86 -------------------------------------- 2 files changed, 87 deletions(-) delete mode 100644 drivers/video/fbdev/omap/lcd_osk.c diff --git a/drivers/video/fbdev/omap/Makefile b/drivers/video/fbdev/omap/Makefile index 504edb9c09dd..6d5082c76919 100644 --- a/drivers/video/fbdev/omap/Makefile +++ b/drivers/video/fbdev/omap/Makefile @@ -18,7 +18,6 @@ objs-y$(CONFIG_FB_OMAP_LCDC_HWA742) += hwa742.o lcds-y$(CONFIG_MACH_AMS_DELTA) += lcd_ams_delta.o lcds-y$(CONFIG_MACH_OMAP_PALMTE) += lcd_palmte.o -lcds-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o lcds-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o diff --git a/drivers/video/fbdev/omap/lcd_osk.c b/drivers/video/fbdev/omap/lcd_osk.c deleted file mode 100644 index 8168ba0d47fd..000000000000 --- a/drivers/video/fbdev/omap/lcd_osk.c +++ /dev/null @@ -1,86 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * LCD panel support for the TI OMAP OSK board - * - * Copyright (C) 2004 Nokia Corporation - * Author: Imre Deak - * Adapted for OSK by - */ - -#include -#include -#include - -#include -#include - -#include "omapfb.h" - -static int osk_panel_enable(struct lcd_panel *panel) -{ - /* configure PWL pin */ - omap_cfg_reg(PWL); - - /* Enable PWL unit */ - omap_writeb(0x01, OMAP_PWL_CLK_ENABLE); - - /* Set PWL level */ - omap_writeb(0xFF, OMAP_PWL_ENABLE); - - /* set GPIO2 high (lcd power enabled) */ - gpio_set_value(2, 1); - - return 0; -} - -static void osk_panel_disable(struct lcd_panel *panel) -{ - /* Set PWL level to zero */ - omap_writeb(0x00, OMAP_PWL_ENABLE); - - /* Disable PWL unit */ - omap_writeb(0x00, OMAP_PWL_CLK_ENABLE); - - /* set GPIO2 low */ - gpio_set_value(2, 0); -} - -static struct lcd_panel osk_panel = { - .name = "osk", - .config = OMAP_LCDC_PANEL_TFT, - - .bpp = 16, - .data_lines = 16, - .x_res = 240, - .y_res = 320, - .pixel_clock = 12500, - .hsw = 40, - .hfp = 40, - .hbp = 72, - .vsw = 1, - .vfp = 1, - .vbp = 0, - .pcd = 12, - - .enable = osk_panel_enable, - .disable = osk_panel_disable, -}; - -static int osk_panel_probe(struct platform_device *pdev) -{ - omapfb_register_panel(&osk_panel); - return 0; -} - -static struct platform_driver osk_panel_driver = { - .probe = osk_panel_probe, - .driver = { - .name = "lcd_osk", - }, -}; - -module_platform_driver(osk_panel_driver); - -MODULE_AUTHOR("Imre Deak"); -MODULE_DESCRIPTION("LCD panel support for the TI OMAP OSK board"); -MODULE_LICENSE("GPL"); -- cgit From 6fa7f537351c8fad0e43e9279efe76dbc942bea0 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Mon, 13 Mar 2023 15:26:52 -0700 Subject: pm-graph: sleepgraph: Avoid crashing on binary data in device names A regression has occurred in the hid-sensor code where a device name string has not been initialized to 0, and ends up without a NULL char and is printed with %s. This includes random binary data in the device name, which makes its way into the ftrace output and ends up crashing sleepgraph because it expects the ftrace output to be ASCII only. For example: "HID-SENSOR-INT-020b?.39.auto" ends up in ftrace instead of "HID-SENSOR-INT-020b.39.auto". It causes this crash in sleepgraph: File "/usr/bin/sleepgraph", line 5579, in executeSuspend for line in fp: File "/usr/lib/python3.10/codecs.py", line 322, in decode (result, consumed) = self._buffer_decode(data, self.errors, final) UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 1568: invalid start byte The issue is present in 6.3-rc1 and is described in full here: https://bugzilla.kernel.org/show_bug.cgi?id=217169 A separate fix has been submitted to have this issue repaired, but it has also exposed a larger bug in sleepgraph, since nothing should make sleepgraph crash. Sleepgraph needs to be able to handle binary data showing up in ftrace gracefully. Modify the ftrace processing code to treat it as potentially binary and to filter out binary data and leave just the ASCII. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217169 Fixes: 98c062e82451 ("HID: hid-sensor-custom: Allow more custom iio sensors") Signed-off-by: Todd Brandt [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- tools/power/pm-graph/sleepgraph.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 82c09cd25cc2..bf4ac24a1c7a 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -5556,9 +5556,8 @@ def executeSuspend(quiet=False): if not quiet: pprint('CAPTURING TRACE') op = sv.writeDatafileHeader(sv.ftracefile, testdata) - fp = open(tp+'trace', 'r') - for line in fp: - op.write(line) + fp = open(tp+'trace', 'rb') + op.write(ascii(fp.read())) op.close() sv.fsetVal('', 'trace') sv.platforminfo(cmdafter) -- cgit From 7ff84910c66c9144cc0de9d9deed9fb84c03aff0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 14 Mar 2023 06:20:58 -0400 Subject: lockd: set file_lock start and end when decoding nlm4 testargs Commit 6930bcbfb6ce dropped the setting of the file_lock range when decoding a nlm_lock off the wire. This causes the client side grant callback to miss matching blocks and reject the lock, only to rerequest it 30s later. Add a helper function to set the file_lock range from the start and end values that the protocol uses, and have the nlm_lock decoder call that to set up the file_lock args properly. Fixes: 6930bcbfb6ce ("lockd: detect and reject lock arguments that overflow") Reported-by: Amir Goldstein Signed-off-by: Jeff Layton Tested-by: Amir Goldstein Cc: stable@vger.kernel.org #6.0 Signed-off-by: Anna Schumaker --- fs/lockd/clnt4xdr.c | 9 +-------- fs/lockd/xdr4.c | 13 ++++++++++++- include/linux/lockd/xdr4.h | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 7df6324ccb8a..8161667c976f 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -261,7 +261,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) u32 exclusive; int error; __be32 *p; - s32 end; memset(lock, 0, sizeof(*lock)); locks_init_lock(fl); @@ -285,13 +284,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK; p = xdr_decode_hyper(p, &l_offset); xdr_decode_hyper(p, &l_len); - end = l_offset + l_len - 1; - - fl->fl_start = (loff_t)l_offset; - if (l_len == 0 || end < 0) - fl->fl_end = OFFSET_MAX; - else - fl->fl_end = (loff_t)end; + nlm4svc_set_file_lock_range(fl, l_offset, l_len); error = 0; out: return error; diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 712fdfeb8ef0..5fcbf30cd275 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -33,6 +33,17 @@ loff_t_to_s64(loff_t offset) return res; } +void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len) +{ + s64 end = off + len - 1; + + fl->fl_start = off; + if (len == 0 || end < 0) + fl->fl_end = OFFSET_MAX; + else + fl->fl_end = end; +} + /* * NLM file handles are defined by specification to be a variable-length * XDR opaque no longer than 1024 bytes. However, this implementation @@ -80,7 +91,7 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) locks_init_lock(fl); fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; - + nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len); return true; } diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 9a6b55da8fd6..72831e35dca3 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -22,6 +22,7 @@ #define nlm4_fbig cpu_to_be32(NLM_FBIG) #define nlm4_failed cpu_to_be32(NLM_FAILED) +void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len); bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -- cgit From 21fd9e8700de86d1169f6336e97d7a74916ed04a Mon Sep 17 00:00:00 2001 From: Chengen Du Date: Wed, 8 Mar 2023 16:03:27 +0800 Subject: NFS: Correct timing for assigning access cache timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user's login time is newer than the cache's timestamp, the original entry in the RB-tree will be replaced by a new entry. Currently, the timestamp is only set if the entry is not found in the RB-tree, which can cause the timestamp to be undefined when the entry exists. This may result in a significant increase in ACCESS operations if the timestamp is set to zero. Signed-off-by: Chengen Du Fixes: 0eb43812c027 ("NFS: Clear the file access cache upon login”) Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a41c3ee4549c..6fbcbb8d6587 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -3089,7 +3089,6 @@ static void nfs_access_add_rbtree(struct inode *inode, else goto found; } - set->timestamp = ktime_get_ns(); rb_link_node(&set->rb_node, parent, p); rb_insert_color(&set->rb_node, root_node); list_add_tail(&set->lru, &nfsi->access_cache_entry_lru); @@ -3114,6 +3113,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set, cache->fsgid = cred->fsgid; cache->group_info = get_group_info(cred->group_info); cache->mask = set->mask; + cache->timestamp = ktime_get_ns(); /* The above field assignments must be visible * before this item appears on the lru. We cannot easily -- cgit From 91d7b60a65d9f71230ea09b86d2058a884a3c2af Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 8 Mar 2023 11:26:32 +0000 Subject: ACPI: PPTT: Fix to avoid sleep in the atomic context when PPTT is absent Commit 0c80f9e165f8 ("ACPI: PPTT: Leave the table mapped for the runtime usage") enabled to map PPTT once on the first invocation of acpi_get_pptt() and never unmapped the same allowing it to be used at runtime with out the hassle of mapping and unmapping the table. This was needed to fetch LLC information from the PPTT in the cpuhotplug path which is executed in the atomic context as the acpi_get_table() might sleep waiting for a mutex. However it missed to handle the case when there is no PPTT on the system which results in acpi_get_pptt() being called from all the secondary CPUs attempting to fetch the LLC information in the atomic context without knowing the absence of PPTT resulting in the splat like below: | BUG: sleeping function called from invalid context at kernel/locking/semaphore.c:164 | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 | preempt_count: 1, expected: 0 | RCU nest depth: 0, expected: 0 | no locks held by swapper/1/0. | irq event stamp: 0 | hardirqs last enabled at (0): 0x0 | hardirqs last disabled at (0): copy_process+0x61c/0x1b40 | softirqs last enabled at (0): copy_process+0x61c/0x1b40 | softirqs last disabled at (0): 0x0 | CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.3.0-rc1 #1 | Call trace: | dump_backtrace+0xac/0x138 | show_stack+0x30/0x48 | dump_stack_lvl+0x60/0xb0 | dump_stack+0x18/0x28 | __might_resched+0x160/0x270 | __might_sleep+0x58/0xb0 | down_timeout+0x34/0x98 | acpi_os_wait_semaphore+0x7c/0xc0 | acpi_ut_acquire_mutex+0x58/0x108 | acpi_get_table+0x40/0xe8 | acpi_get_pptt+0x48/0xa0 | acpi_get_cache_info+0x38/0x140 | init_cache_level+0xf4/0x118 | detect_cache_attributes+0x2e4/0x640 | update_siblings_masks+0x3c/0x330 | store_cpu_topology+0x88/0xf0 | secondary_start_kernel+0xd0/0x168 | __secondary_switched+0xb8/0xc0 Update acpi_get_pptt() to consider the fact that PPTT is once checked and is not available on the system and return NULL avoiding any attempts to fetch PPTT and thereby avoiding any possible sleep waiting for a mutex in the atomic context. Fixes: 0c80f9e165f8 ("ACPI: PPTT: Leave the table mapped for the runtime usage") Reported-by: Aishwarya TCV Signed-off-by: Sudeep Holla Tested-by: Pierre Gondois Cc: 6.0+ # 6.0+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pptt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 10975bb603fb..a35dd0e41c27 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -536,16 +536,19 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table, static struct acpi_table_header *acpi_get_pptt(void) { static struct acpi_table_header *pptt; + static bool is_pptt_checked; acpi_status status; /* * PPTT will be used at runtime on every CPU hotplug in path, so we * don't need to call acpi_put_table() to release the table mapping. */ - if (!pptt) { + if (!pptt && !is_pptt_checked) { status = acpi_get_table(ACPI_SIG_PPTT, 0, &pptt); if (ACPI_FAILURE(status)) acpi_pptt_warn_missing(); + + is_pptt_checked = true; } return pptt; -- cgit From 0bc23d8b2237a104d7f8379d687aa4cb82e2968b Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 8 Mar 2023 21:23:09 +0800 Subject: ACPI: tools: pfrut: Check if the input of level and type is in the right numeric range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user provides arbitrary non-numeic value to level and type, which could bring unexpected behavior. In this case the expected behavior would be to throw an error. pfrut -h usage: pfrut [OPTIONS] code injection: -l, --load -s, --stage -a, --activate -u, --update [stage and activate] -q, --query -d, --revid update telemetry: -G, --getloginfo -T, --type(0:execution, 1:history) -L, --level(0, 1, 2, 4) -R, --read -D, --revid log pfrut -T A pfrut -G log_level:0 log_type:0 log_revid:2 max_data_size:65536 chunk1_size:0 chunk2_size:1530 rollover_cnt:0 reset_cnt:17 Fix this by restricting the input to be in the expected range. Reported-by: Hariganesh Govindarajulu Suggested-by: "Rafael J. Wysocki" Signed-off-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/tools/pfrut/pfrut.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c index 52aa0351533c..388c9e3ad040 100644 --- a/tools/power/acpi/tools/pfrut/pfrut.c +++ b/tools/power/acpi/tools/pfrut/pfrut.c @@ -97,7 +97,7 @@ static struct option long_options[] = { static void parse_options(int argc, char **argv) { int option_index = 0; - char *pathname; + char *pathname, *endptr; int opt; pathname = strdup(argv[0]); @@ -125,11 +125,23 @@ static void parse_options(int argc, char **argv) log_getinfo = 1; break; case 'T': - log_type = atoi(optarg); + log_type = strtol(optarg, &endptr, 0); + if (*endptr || (log_type != 0 && log_type != 1)) { + printf("Number expected: type(0:execution, 1:history) - Quit.\n"); + exit(1); + } + set_log_type = 1; break; case 'L': - log_level = atoi(optarg); + log_level = strtol(optarg, &endptr, 0); + if (*endptr || + (log_level != 0 && log_level != 1 && + log_level != 2 && log_level != 4)) { + printf("Number expected: level(0, 1, 2, 4) - Quit.\n"); + exit(1); + } + set_log_level = 1; break; case 'R': -- cgit From c2679254b9c9980d9045f0f722cf093a2b1f7590 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 10 Mar 2023 17:28:56 -0500 Subject: tracing: Make tracepoint lockdep check actually test something A while ago where the trace events had the following: rcu_read_lock_sched_notrace(); rcu_dereference_sched(...); rcu_read_unlock_sched_notrace(); If the tracepoint is enabled, it could trigger RCU issues if called in the wrong place. And this warning was only triggered if lockdep was enabled. If the tracepoint was never enabled with lockdep, the bug would not be caught. To handle this, the above sequence was done when lockdep was enabled regardless if the tracepoint was enabled or not (although the always enabled code really didn't do anything, it would still trigger a warning). But a lot has changed since that lockdep code was added. One is, that sequence no longer triggers any warning. Another is, the tracepoint when enabled doesn't even do that sequence anymore. The main check we care about today is whether RCU is "watching" or not. So if lockdep is enabled, always check if rcu_is_watching() which will trigger a warning if it is not (tracepoints require RCU to be watching). Note, that old sequence did add a bit of overhead when lockdep was enabled, and with the latest kernel updates, would cause the system to slow down enough to trigger kernel "stalled" warnings. Link: http://lore.kernel.org/lkml/20140806181801.GA4605@redhat.com Link: http://lore.kernel.org/lkml/20140807175204.C257CAC5@viggo.jf.intel.com Link: https://lore.kernel.org/lkml/20230307184645.521db5c9@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20230310172856.77406446@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Dave Hansen Cc: "Paul E. McKenney" Cc: Mathieu Desnoyers Cc: Joel Fernandes Acked-by: Peter Zijlstra (Intel) Acked-by: Paul E. McKenney Fixes: e6753f23d961 ("tracepoint: Make rcuidle tracepoint callers use SRCU") Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index fa1004fcf810..2083f2d2f05b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -231,12 +231,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. * - * When lockdep is enabled, we make sure to always do the RCU portions of - * the tracepoint code, regardless of whether tracing is on. However, - * don't check if the condition is false, due to interaction with idle - * instrumentation. This lets us find RCU issues triggered with tracepoints - * even when this tracepoint is off. This code has no purpose other than - * poking RCU a bit. + * When lockdep is enabled, we make sure to always test if RCU is + * "watching" regardless if the tracepoint is enabled or not. Tracepoints + * require RCU to be active, and it should always warn at the tracepoint + * site if it is not watching, as it will need to be active when the + * tracepoint is enabled. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ extern int __traceiter_##name(data_proto); \ @@ -249,9 +248,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) TP_ARGS(args), \ TP_CONDITION(cond), 0); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ - rcu_read_lock_sched_notrace(); \ - rcu_dereference_sched(__tracepoint_##name.funcs);\ - rcu_read_unlock_sched_notrace(); \ + WARN_ON_ONCE(!rcu_is_watching()); \ } \ } \ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ -- cgit From 211baef0eabf4169ce4f73ebd917749d1a7edd74 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Mon, 13 Mar 2023 16:09:54 +0100 Subject: cifs: Fix smb2_set_path_size() If cifs_get_writable_path() finds a writable file, smb2_compound_op() must use that file's FID and not the COMPOUND_FID. Cc: stable@vger.kernel.org Signed-off-by: Volker Lendecke Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2inode.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 9b956294e864..8dd3791b5c53 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -234,15 +234,32 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, size[0] = 8; /* sizeof __le64 */ data[0] = ptr; - rc = SMB2_set_info_init(tcon, server, - &rqst[num_rqst], COMPOUND_FID, - COMPOUND_FID, current->tgid, - FILE_END_OF_FILE_INFORMATION, - SMB2_O_INFO_FILE, 0, data, size); + if (cfile) { + rc = SMB2_set_info_init(tcon, server, + &rqst[num_rqst], + cfile->fid.persistent_fid, + cfile->fid.volatile_fid, + current->tgid, + FILE_END_OF_FILE_INFORMATION, + SMB2_O_INFO_FILE, 0, + data, size); + } else { + rc = SMB2_set_info_init(tcon, server, + &rqst[num_rqst], + COMPOUND_FID, + COMPOUND_FID, + current->tgid, + FILE_END_OF_FILE_INFORMATION, + SMB2_O_INFO_FILE, 0, + data, size); + if (!rc) { + smb2_set_next_command(tcon, &rqst[num_rqst]); + smb2_set_related(&rqst[num_rqst]); + } + } if (rc) goto finished; - smb2_set_next_command(tcon, &rqst[num_rqst]); - smb2_set_related(&rqst[num_rqst++]); + num_rqst++; trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path); break; case SMB2_OP_SET_INFO: -- cgit From 05ce0448c3f36febd8db0ee0e9e16557f3ab5ee8 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 10 Mar 2023 15:32:01 +0000 Subject: cifs: generate signkey for the channel that's reconnecting Before my changes to how multichannel reconnects work, the primary channel was always used to do a non-binding session setup. With my changes, that is not the case anymore. Missed this place where channel at index 0 was forcibly updated with the signing key. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/smb2transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 381babc1212c..d827b7547ffa 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -425,7 +425,7 @@ generate_smb3signingkey(struct cifs_ses *ses, /* safe to access primary channel, since it will never go away */ spin_lock(&ses->chan_lock); - memcpy(ses->chans[0].signkey, ses->smb3signingkey, + memcpy(ses->chans[chan_index].signkey, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); spin_unlock(&ses->chan_lock); -- cgit From f959325e6ac3f499450088b8d9c626d1177be160 Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Fri, 10 Mar 2023 11:33:25 -0800 Subject: fsverity: Remove WQ_UNBOUND from fsverity read workqueue WQ_UNBOUND causes significant scheduler latency on ARM64/Android. This is problematic for latency sensitive workloads, like I/O post-processing. Removing WQ_UNBOUND gives a 96% reduction in fsverity workqueue related scheduler latency and improves app cold startup times by ~30ms. WQ_UNBOUND was also removed from the dm-verity workqueue for the same reason [1]. This code was tested by running Android app startup benchmarks and measuring how long the fsverity workqueue spent in the runnable state. Before Total workqueue scheduler latency: 553800us After Total workqueue scheduler latency: 18962us [1]: https://lore.kernel.org/all/20230202012348.885402-1-nhuck@google.com/ Signed-off-by: Nathan Huckleberry Fixes: 8a1d0f9cacc9 ("fs-verity: add data verification hooks for ->readpages()") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230310193325.620493-1-nhuck@google.com Signed-off-by: Eric Biggers --- fs/verity/verify.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/verity/verify.c b/fs/verity/verify.c index f50e3b5b52c9..e2508222750b 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -387,15 +387,15 @@ EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work); int __init fsverity_init_workqueue(void) { /* - * Use an unbound workqueue to allow bios to be verified in parallel - * even when they happen to complete on the same CPU. This sacrifices - * locality, but it's worthwhile since hashing is CPU-intensive. + * Use a high-priority workqueue to prioritize verification work, which + * blocks reads from completing, over regular application tasks. * - * Also use a high-priority workqueue to prioritize verification work, - * which blocks reads from completing, over regular application tasks. + * For performance reasons, don't use an unbound workqueue. Using an + * unbound workqueue for crypto operations causes excessive scheduler + * latency on ARM64. */ fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue", - WQ_UNBOUND | WQ_HIGHPRI, + WQ_HIGHPRI, num_online_cpus()); if (!fsverity_read_workqueue) return -ENOMEM; -- cgit From 9b0cb770f5d7b1ff40bea7ca385438ee94570eec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Mar 2023 11:21:54 -0700 Subject: loop: Fix use-after-free issues do_req_filebacked() calls blk_mq_complete_request() synchronously or asynchronously when using asynchronous I/O unless memory allocation fails. Hence, modify loop_handle_cmd() such that it does not dereference 'cmd' nor 'rq' after do_req_filebacked() finished unless we are sure that the request has not yet been completed. This patch fixes the following kernel crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000054 Call trace: css_put.42938+0x1c/0x1ac loop_process_work+0xc8c/0xfd4 loop_rootcg_workfn+0x24/0x34 process_one_work+0x244/0x558 worker_thread+0x400/0x8fc kthread+0x16c/0x1e0 ret_from_fork+0x10/0x20 Cc: Christoph Hellwig Cc: Ming Lei Cc: Jan Kara Cc: Johannes Weiner Cc: Dan Schatzberg Fixes: c74d40e8b5e2 ("loop: charge i/o to mem and blk cg") Fixes: bc07c10a3603 ("block: loop: support DIO & AIO") Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230314182155.80625-1-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/loop.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 839373451c2b..28eb59fd71ca 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1859,35 +1859,44 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, static void loop_handle_cmd(struct loop_cmd *cmd) { + struct cgroup_subsys_state *cmd_blkcg_css = cmd->blkcg_css; + struct cgroup_subsys_state *cmd_memcg_css = cmd->memcg_css; struct request *rq = blk_mq_rq_from_pdu(cmd); const bool write = op_is_write(req_op(rq)); struct loop_device *lo = rq->q->queuedata; int ret = 0; struct mem_cgroup *old_memcg = NULL; + const bool use_aio = cmd->use_aio; if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { ret = -EIO; goto failed; } - if (cmd->blkcg_css) - kthread_associate_blkcg(cmd->blkcg_css); - if (cmd->memcg_css) + if (cmd_blkcg_css) + kthread_associate_blkcg(cmd_blkcg_css); + if (cmd_memcg_css) old_memcg = set_active_memcg( - mem_cgroup_from_css(cmd->memcg_css)); + mem_cgroup_from_css(cmd_memcg_css)); + /* + * do_req_filebacked() may call blk_mq_complete_request() synchronously + * or asynchronously if using aio. Hence, do not touch 'cmd' after + * do_req_filebacked() has returned unless we are sure that 'cmd' has + * not yet been completed. + */ ret = do_req_filebacked(lo, rq); - if (cmd->blkcg_css) + if (cmd_blkcg_css) kthread_associate_blkcg(NULL); - if (cmd->memcg_css) { + if (cmd_memcg_css) { set_active_memcg(old_memcg); - css_put(cmd->memcg_css); + css_put(cmd_memcg_css); } failed: /* complete non-aio request */ - if (!cmd->use_aio || ret) { + if (!use_aio || ret) { if (ret == -EOPNOTSUPP) cmd->ret = ret; else -- cgit From 00e885efcfbb8712d3e1bfc1ae30639c15ca1d3b Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Fri, 10 Mar 2023 09:09:13 +0800 Subject: blk-mq: fix "bad unlock balance detected" on q->srcu in __blk_mq_run_dispatch_ops The 'q' parameter of the macro __blk_mq_run_dispatch_ops may not be one local variable, such as, it is rq->q, then request queue pointed by this variable could be changed to another queue in case of BLK_MQ_F_TAG_QUEUE_SHARED after 'dispatch_ops' returns, then 'bad unlock balance' is triggered. Fixes the issue by adding one local variable for doing srcu lock/unlock. Fixes: 2a904d00855f ("blk-mq: remove hctx_lock and hctx_unlock") Cc: Marco Patalano Signed-off-by: Chris Leech Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230310010913.1014789-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.h b/block/blk-mq.h index ef59fee62780..a7482d2cc82e 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -378,12 +378,13 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \ do { \ if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) { \ + struct blk_mq_tag_set *__tag_set = (q)->tag_set; \ int srcu_idx; \ \ might_sleep_if(check_sleep); \ - srcu_idx = srcu_read_lock((q)->tag_set->srcu); \ + srcu_idx = srcu_read_lock(__tag_set->srcu); \ (dispatch_ops); \ - srcu_read_unlock((q)->tag_set->srcu, srcu_idx); \ + srcu_read_unlock(__tag_set->srcu, srcu_idx); \ } else { \ rcu_read_lock(); \ (dispatch_ops); \ -- cgit From dc472c7612297ffc9aea655bf6e9538bec5bfedf Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Sat, 11 Mar 2023 20:25:38 +0100 Subject: ata: pata_parport: fix parport release without claim When adapter is not found, pi->disconnect() is called without previous pi->connect(). This results in error like this: parport0: pata_parport tried to release parport when not owner Add missing out_disconnect label and use it correctly. Signed-off-by: Ondrej Zary Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_parport/pata_parport.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c index 294a266a0dda..31c9677a45e3 100644 --- a/drivers/ata/pata_parport/pata_parport.c +++ b/drivers/ata/pata_parport/pata_parport.c @@ -487,12 +487,13 @@ static struct pi_adapter *pi_init_one(struct parport *parport, pi_connect(pi); if (ata_host_activate(host, 0, NULL, 0, &pata_parport_sht)) - goto out_unreg_parport; + goto out_disconnect; return pi; -out_unreg_parport: +out_disconnect: pi_disconnect(pi); +out_unreg_parport: parport_unregister_device(pi->pardev); if (pi->proto->release_proto) pi->proto->release_proto(pi); -- cgit From b56bce502f55505a97e381d546ee881928183126 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 14 Mar 2023 20:32:53 -0300 Subject: cifs: set DFS root session in cifs_get_smb_ses() Set the DFS root session pointer earlier when creating a new SMB session to prevent racing with smb2_reconnect(), cifs_reconnect_tcon() and DFS cache refresher. Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 1 + fs/cifs/cifsglob.h | 1 - fs/cifs/connect.c | 1 + fs/cifs/dfs.c | 19 ++++++++----------- fs/cifs/dfs.h | 3 ++- fs/cifs/fs_context.h | 1 + 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 2b1a8d55b4ec..cb40074feb3e 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -179,6 +179,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct path *path) tmp.source = full_path; tmp.leaf_fullpath = NULL; tmp.UNC = tmp.prepath = NULL; + tmp.dfs_root_ses = NULL; rc = smb3_fs_context_dup(ctx, &tmp); if (rc) { diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a99883f16d94..1a8190f71c24 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1749,7 +1749,6 @@ struct cifs_mount_ctx { struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - struct cifs_ses *root_ses; uuid_t mount_id; char *origin_fullpath, *leaf_fullpath; }; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5233f14f0636..b96375f137fa 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2229,6 +2229,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) * need to lock before changing something in the session. */ spin_lock(&cifs_tcp_ses_lock); + ses->dfs_root_ses = ctx->dfs_root_ses; list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index b64d20374b9c..6505f1b20147 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -95,25 +95,22 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) ctx->leaf_fullpath = (char *)full_path; rc = cifs_mount_get_session(mnt_ctx); ctx->leaf_fullpath = NULL; - if (!rc) { - struct cifs_ses *ses = mnt_ctx->ses; - mutex_lock(&ses->session_mutex); - ses->dfs_root_ses = mnt_ctx->root_ses; - mutex_unlock(&ses->session_mutex); - } return rc; } static void set_root_ses(struct cifs_mount_ctx *mnt_ctx) { - if (mnt_ctx->ses) { + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct cifs_ses *ses = mnt_ctx->ses; + + if (ses) { spin_lock(&cifs_tcp_ses_lock); - mnt_ctx->ses->ses_count++; + ses->ses_count++; spin_unlock(&cifs_tcp_ses_lock); - dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses); + dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, ses); } - mnt_ctx->root_ses = mnt_ctx->ses; + ctx->dfs_root_ses = mnt_ctx->ses; } static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, const char *full_path, @@ -260,7 +257,7 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) rc = get_session(mnt_ctx, NULL); if (rc) return rc; - mnt_ctx->root_ses = mnt_ctx->ses; + ctx->dfs_root_ses = mnt_ctx->ses; /* * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally * try to get an DFS referral (even cached) to determine whether it is an DFS mount. diff --git a/fs/cifs/dfs.h b/fs/cifs/dfs.h index 344bea6d8bab..baf16df55d7e 100644 --- a/fs/cifs/dfs.h +++ b/fs/cifs/dfs.h @@ -22,9 +22,10 @@ static inline char *dfs_get_path(struct cifs_sb_info *cifs_sb, const char *path) static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *path, struct dfs_info3_param *ref, struct dfs_cache_tgt_list *tl) { + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - return dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, + return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls, cifs_remap(cifs_sb), path, ref, tl); } diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index 44cb5639ed3b..1b8d4e27f831 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -265,6 +265,7 @@ struct smb3_fs_context { bool rootfs:1; /* if it's a SMB root file system */ bool witness:1; /* use witness protocol */ char *leaf_fullpath; + struct cifs_ses *dfs_root_ses; }; extern const struct fs_parameter_spec smb3_fs_parameters[]; -- cgit From 396935de145589c8bfe552fa03a5e38604071829 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 14 Mar 2023 20:32:54 -0300 Subject: cifs: fix use-after-free bug in refresh_cache_worker() The UAF bug occurred because we were putting DFS root sessions in cifs_umount() while DFS cache refresher was being executed. Make DFS root sessions have same lifetime as DFS tcons so we can avoid the use-after-free bug is DFS cache refresher and other places that require IPCs to get new DFS referrals on. Also, get rid of mount group handling in DFS cache as we no longer need it. This fixes below use-after-free bug catched by KASAN [ 379.946955] BUG: KASAN: use-after-free in __refresh_tcon.isra.0+0x10b/0xc10 [cifs] [ 379.947642] Read of size 8 at addr ffff888018f57030 by task kworker/u4:3/56 [ 379.948096] [ 379.948208] CPU: 0 PID: 56 Comm: kworker/u4:3 Not tainted 6.2.0-rc7-lku #23 [ 379.948661] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014 [ 379.949368] Workqueue: cifs-dfscache refresh_cache_worker [cifs] [ 379.949942] Call Trace: [ 379.950113] [ 379.950260] dump_stack_lvl+0x50/0x67 [ 379.950510] print_report+0x16a/0x48e [ 379.950759] ? __virt_addr_valid+0xd8/0x160 [ 379.951040] ? __phys_addr+0x41/0x80 [ 379.951285] kasan_report+0xdb/0x110 [ 379.951533] ? __refresh_tcon.isra.0+0x10b/0xc10 [cifs] [ 379.952056] ? __refresh_tcon.isra.0+0x10b/0xc10 [cifs] [ 379.952585] __refresh_tcon.isra.0+0x10b/0xc10 [cifs] [ 379.953096] ? __pfx___refresh_tcon.isra.0+0x10/0x10 [cifs] [ 379.953637] ? __pfx___mutex_lock+0x10/0x10 [ 379.953915] ? lock_release+0xb6/0x720 [ 379.954167] ? __pfx_lock_acquire+0x10/0x10 [ 379.954443] ? refresh_cache_worker+0x34e/0x6d0 [cifs] [ 379.954960] ? __pfx_wb_workfn+0x10/0x10 [ 379.955239] refresh_cache_worker+0x4ad/0x6d0 [cifs] [ 379.955755] ? __pfx_refresh_cache_worker+0x10/0x10 [cifs] [ 379.956323] ? __pfx_lock_acquired+0x10/0x10 [ 379.956615] ? read_word_at_a_time+0xe/0x20 [ 379.956898] ? lockdep_hardirqs_on_prepare+0x12/0x220 [ 379.957235] process_one_work+0x535/0x990 [ 379.957509] ? __pfx_process_one_work+0x10/0x10 [ 379.957812] ? lock_acquired+0xb7/0x5f0 [ 379.958069] ? __list_add_valid+0x37/0xd0 [ 379.958341] ? __list_add_valid+0x37/0xd0 [ 379.958611] worker_thread+0x8e/0x630 [ 379.958861] ? __pfx_worker_thread+0x10/0x10 [ 379.959148] kthread+0x17d/0x1b0 [ 379.959369] ? __pfx_kthread+0x10/0x10 [ 379.959630] ret_from_fork+0x2c/0x50 [ 379.959879] Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Steve French --- fs/cifs/cifs_fs_sb.h | 2 - fs/cifs/cifsglob.h | 3 +- fs/cifs/connect.c | 9 ++-- fs/cifs/dfs.c | 52 ++++++++++++++----- fs/cifs/dfs.h | 16 ++++++ fs/cifs/dfs_cache.c | 140 --------------------------------------------------- fs/cifs/dfs_cache.h | 2 - fs/cifs/misc.c | 7 +++ 8 files changed, 67 insertions(+), 164 deletions(-) diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 013a4bd65280..651759192280 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -61,8 +61,6 @@ struct cifs_sb_info { /* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */ char *prepath; - /* randomly generated 128-bit number for indexing dfs mount groups in referral cache */ - uuid_t dfs_mount_id; /* * Indicate whether serverino option was turned off later * (cifs_autodisable_serverino) in order to match new mounts. diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1a8190f71c24..08a73dcb7786 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1233,6 +1233,7 @@ struct cifs_tcon { /* BB add field for back pointer to sb struct(s)? */ #ifdef CONFIG_CIFS_DFS_UPCALL struct list_head ulist; /* cache update list */ + struct list_head dfs_ses_list; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ }; @@ -1749,8 +1750,8 @@ struct cifs_mount_ctx { struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; - uuid_t mount_id; char *origin_fullpath, *leaf_fullpath; + struct list_head dfs_ses_list; }; static inline void free_dfs_info_param(struct dfs_info3_param *param) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b96375f137fa..0eceddde7140 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3408,7 +3408,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) bool isdfs; int rc; - uuid_gen(&mnt_ctx.mount_id); + INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list); + rc = dfs_mount_share(&mnt_ctx, &isdfs); if (rc) goto error; @@ -3428,7 +3429,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) kfree(cifs_sb->prepath); cifs_sb->prepath = ctx->prepath; ctx->prepath = NULL; - uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id); out: cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); @@ -3440,7 +3440,7 @@ out: return rc; error: - dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id); + dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list); kfree(mnt_ctx.origin_fullpath); kfree(mnt_ctx.leaf_fullpath); cifs_mount_put_conns(&mnt_ctx); @@ -3638,9 +3638,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb) spin_unlock(&cifs_sb->tlink_tree_lock); kfree(cifs_sb->prepath); -#ifdef CONFIG_CIFS_DFS_UPCALL - dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id); -#endif call_rcu(&cifs_sb->rcu, delayed_free); } diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index 6505f1b20147..c8bda52fa096 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -99,18 +99,27 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) return rc; } -static void set_root_ses(struct cifs_mount_ctx *mnt_ctx) +static int get_root_smb_session(struct cifs_mount_ctx *mnt_ctx) { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct dfs_root_ses *root_ses; struct cifs_ses *ses = mnt_ctx->ses; if (ses) { + root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL); + if (!root_ses) + return -ENOMEM; + + INIT_LIST_HEAD(&root_ses->list); + spin_lock(&cifs_tcp_ses_lock); ses->ses_count++; spin_unlock(&cifs_tcp_ses_lock); - dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, ses); + root_ses->ses = ses; + list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list); } - ctx->dfs_root_ses = mnt_ctx->ses; + ctx->dfs_root_ses = ses; + return 0; } static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, const char *full_path, @@ -118,7 +127,8 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co { struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; struct dfs_info3_param ref = {}; - int rc; + bool is_refsrv = false; + int rc, rc2; rc = dfs_cache_get_tgt_referral(ref_path + 1, tit, &ref); if (rc) @@ -133,8 +143,7 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co if (rc) goto out; - if (ref.flags & DFSREF_REFERRAL_SERVER) - set_root_ses(mnt_ctx); + is_refsrv = !!(ref.flags & DFSREF_REFERRAL_SERVER); rc = -EREMOTE; if (ref.flags & DFSREF_STORAGE_SERVER) { @@ -143,13 +152,17 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co goto out; /* some servers may not advertise referral capability under ref.flags */ - if (!(ref.flags & DFSREF_REFERRAL_SERVER) && - is_tcon_dfs(mnt_ctx->tcon)) - set_root_ses(mnt_ctx); + is_refsrv |= is_tcon_dfs(mnt_ctx->tcon); rc = cifs_is_path_remote(mnt_ctx); } + if (rc == -EREMOTE && is_refsrv) { + rc2 = get_root_smb_session(mnt_ctx); + if (rc2) + rc = rc2; + } + out: free_dfs_info_param(&ref); return rc; @@ -162,6 +175,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) char *ref_path = NULL, *full_path = NULL; struct dfs_cache_tgt_iterator *tit; struct TCP_Server_Info *server; + struct cifs_tcon *tcon; char *origin_fullpath = NULL; int num_links = 0; int rc; @@ -231,12 +245,22 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) if (!rc) { server = mnt_ctx->server; + tcon = mnt_ctx->tcon; mutex_lock(&server->refpath_lock); - server->origin_fullpath = origin_fullpath; - server->current_fullpath = server->leaf_fullpath; + if (!server->origin_fullpath) { + server->origin_fullpath = origin_fullpath; + server->current_fullpath = server->leaf_fullpath; + origin_fullpath = NULL; + } mutex_unlock(&server->refpath_lock); - origin_fullpath = NULL; + + if (list_empty(&tcon->dfs_ses_list)) { + list_replace_init(&mnt_ctx->dfs_ses_list, + &tcon->dfs_ses_list); + } else { + dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list); + } } out: @@ -277,7 +301,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs) } *isdfs = true; - set_root_ses(mnt_ctx); + rc = get_root_smb_session(mnt_ctx); + if (rc) + return rc; return __dfs_mount_share(mnt_ctx); } diff --git a/fs/cifs/dfs.h b/fs/cifs/dfs.h index baf16df55d7e..13f26e01f7b9 100644 --- a/fs/cifs/dfs.h +++ b/fs/cifs/dfs.h @@ -10,6 +10,11 @@ #include "fs_context.h" #include "cifs_unicode.h" +struct dfs_root_ses { + struct list_head list; + struct cifs_ses *ses; +}; + int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, struct smb3_fs_context *ctx); int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs); @@ -44,4 +49,15 @@ static inline char *dfs_get_automount_devname(struct dentry *dentry, void *page) true); } +static inline void dfs_put_root_smb_sessions(struct list_head *head) +{ + struct dfs_root_ses *root, *tmp; + + list_for_each_entry_safe(root, tmp, head, list) { + list_del_init(&root->list); + cifs_put_smb_ses(root->ses); + kfree(root); + } +} + #endif /* _CIFS_DFS_H */ diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index ac86bd0ebd63..1c59811bfa73 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -49,17 +49,6 @@ struct cache_entry { struct cache_dfs_tgt *tgthint; }; -/* List of referral server sessions per dfs mount */ -struct mount_group { - struct list_head list; - uuid_t id; - struct cifs_ses *sessions[CACHE_MAX_ENTRIES]; - int num_sessions; - spinlock_t lock; - struct list_head refresh_list; - struct kref refcount; -}; - static struct kmem_cache *cache_slab __read_mostly; static struct workqueue_struct *dfscache_wq __read_mostly; @@ -76,85 +65,10 @@ static atomic_t cache_count; static struct hlist_head cache_htable[CACHE_HTABLE_SIZE]; static DECLARE_RWSEM(htable_rw_lock); -static LIST_HEAD(mount_group_list); -static DEFINE_MUTEX(mount_group_list_lock); - static void refresh_cache_worker(struct work_struct *work); static DECLARE_DELAYED_WORK(refresh_task, refresh_cache_worker); -static void __mount_group_release(struct mount_group *mg) -{ - int i; - - for (i = 0; i < mg->num_sessions; i++) - cifs_put_smb_ses(mg->sessions[i]); - kfree(mg); -} - -static void mount_group_release(struct kref *kref) -{ - struct mount_group *mg = container_of(kref, struct mount_group, refcount); - - mutex_lock(&mount_group_list_lock); - list_del(&mg->list); - mutex_unlock(&mount_group_list_lock); - __mount_group_release(mg); -} - -static struct mount_group *find_mount_group_locked(const uuid_t *id) -{ - struct mount_group *mg; - - list_for_each_entry(mg, &mount_group_list, list) { - if (uuid_equal(&mg->id, id)) - return mg; - } - return ERR_PTR(-ENOENT); -} - -static struct mount_group *__get_mount_group_locked(const uuid_t *id) -{ - struct mount_group *mg; - - mg = find_mount_group_locked(id); - if (!IS_ERR(mg)) - return mg; - - mg = kmalloc(sizeof(*mg), GFP_KERNEL); - if (!mg) - return ERR_PTR(-ENOMEM); - kref_init(&mg->refcount); - uuid_copy(&mg->id, id); - mg->num_sessions = 0; - spin_lock_init(&mg->lock); - list_add(&mg->list, &mount_group_list); - return mg; -} - -static struct mount_group *get_mount_group(const uuid_t *id) -{ - struct mount_group *mg; - - mutex_lock(&mount_group_list_lock); - mg = __get_mount_group_locked(id); - if (!IS_ERR(mg)) - kref_get(&mg->refcount); - mutex_unlock(&mount_group_list_lock); - - return mg; -} - -static void free_mount_group_list(void) -{ - struct mount_group *mg, *tmp_mg; - - list_for_each_entry_safe(mg, tmp_mg, &mount_group_list, list) { - list_del_init(&mg->list); - __mount_group_release(mg); - } -} - /** * dfs_cache_canonical_path - get a canonical DFS path * @@ -704,7 +618,6 @@ void dfs_cache_destroy(void) { cancel_delayed_work_sync(&refresh_task); unload_nls(cache_cp); - free_mount_group_list(); flush_cache_ents(); kmem_cache_destroy(cache_slab); destroy_workqueue(dfscache_wq); @@ -1111,54 +1024,6 @@ out_unlock: return rc; } -/** - * dfs_cache_add_refsrv_session - add SMB session of referral server - * - * @mount_id: mount group uuid to lookup. - * @ses: reference counted SMB session of referral server. - */ -void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses) -{ - struct mount_group *mg; - - if (WARN_ON_ONCE(!mount_id || uuid_is_null(mount_id) || !ses)) - return; - - mg = get_mount_group(mount_id); - if (WARN_ON_ONCE(IS_ERR(mg))) - return; - - spin_lock(&mg->lock); - if (mg->num_sessions < ARRAY_SIZE(mg->sessions)) - mg->sessions[mg->num_sessions++] = ses; - spin_unlock(&mg->lock); - kref_put(&mg->refcount, mount_group_release); -} - -/** - * dfs_cache_put_refsrv_sessions - put all referral server sessions - * - * Put all SMB sessions from the given mount group id. - * - * @mount_id: mount group uuid to lookup. - */ -void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id) -{ - struct mount_group *mg; - - if (!mount_id || uuid_is_null(mount_id)) - return; - - mutex_lock(&mount_group_list_lock); - mg = find_mount_group_locked(mount_id); - if (IS_ERR(mg)) { - mutex_unlock(&mount_group_list_lock); - return; - } - mutex_unlock(&mount_group_list_lock); - kref_put(&mg->refcount, mount_group_release); -} - /* Extract share from DFS target and return a pointer to prefix path or NULL */ static const char *parse_target_share(const char *target, char **share) { @@ -1384,11 +1249,6 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) cifs_dbg(FYI, "%s: not a dfs mount\n", __func__); return 0; } - - if (uuid_is_null(&cifs_sb->dfs_mount_id)) { - cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__); - return -EINVAL; - } /* * After reconnecting to a different server, unique ids won't match anymore, so we disable * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index be3b5a44cf82..e0d39393035a 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -40,8 +40,6 @@ int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iter struct dfs_info3_param *ref); int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share, char **prefix); -void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id); -void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses); char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap); int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index a0d286ee723d..6f9c78650528 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -22,6 +22,7 @@ #ifdef CONFIG_CIFS_DFS_UPCALL #include "dns_resolve.h" #include "dfs_cache.h" +#include "dfs.h" #endif #include "fs_context.h" #include "cached_dir.h" @@ -134,6 +135,9 @@ tconInfoAlloc(void) spin_lock_init(&ret_buf->stat_lock); atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); +#ifdef CONFIG_CIFS_DFS_UPCALL + INIT_LIST_HEAD(&ret_buf->dfs_ses_list); +#endif return ret_buf; } @@ -149,6 +153,9 @@ tconInfoFree(struct cifs_tcon *tcon) atomic_dec(&tconInfoAllocCount); kfree(tcon->nativeFileSystem); kfree_sensitive(tcon->password); +#ifdef CONFIG_CIFS_DFS_UPCALL + dfs_put_root_smb_sessions(&tcon->dfs_ses_list); +#endif kfree(tcon); } -- cgit From 47dd902aaee9b9341808a3a994793199e7eddb88 Mon Sep 17 00:00:00 2001 From: Dylan Jhong Date: Fri, 10 Mar 2023 15:50:21 +0800 Subject: RISC-V: mm: Support huge page in vmalloc_fault() Since RISC-V supports ioremap() with huge page (pud/pmd) mapping, However, vmalloc_fault() assumes that the vmalloc range is limited to pte mappings. To complete the vmalloc_fault() function by adding huge page support. Fixes: 310f541a027b ("riscv: Enable HAVE_ARCH_HUGE_VMAP for 64BIT") Cc: stable@vger.kernel.org Signed-off-by: Dylan Jhong Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230310075021.3919290-1-dylan@andestech.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 460f785f6e09..d5f3e501dffb 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -143,6 +143,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a no_context(regs, addr); return; } + if (pud_leaf(*pud_k)) + goto flush_tlb; /* * Since the vmalloc area is global, it is unnecessary @@ -153,6 +155,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a no_context(regs, addr); return; } + if (pmd_leaf(*pmd_k)) + goto flush_tlb; /* * Make sure the actual PTE exists as well to @@ -172,6 +176,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * ordering constraint, not a cache flush; it is * necessary even after writing invalid entries. */ +flush_tlb: local_flush_tlb_page(addr); } -- cgit From 6015b1aca1a233379625385feb01dd014aca60b5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 14 Mar 2023 19:32:38 -0700 Subject: sched_getaffinity: don't assume 'cpumask_size()' is fully initialized The getaffinity() system call uses 'cpumask_size()' to decide how big the CPU mask is - so far so good. It is indeed the allocation size of a cpumask. But the code also assumes that the whole allocation is initialized without actually doing so itself. That's wrong, because we might have fixed-size allocations (making copying and clearing more efficient), but not all of it is then necessarily used if 'nr_cpu_ids' is smaller. Having checked other users of 'cpumask_size()', they all seem to be ok, either using it purely for the allocation size, or explicitly zeroing the cpumask before using the size in bytes to copy it. See for example the ublk_ctrl_get_queue_affinity() function that uses the proper 'zalloc_cpumask_var()' to make sure that the whole mask is cleared, whether the storage is on the stack or if it was an external allocation. Fix this by just zeroing the allocation before using it. Do the same for the compat version of sched_getaffinity(), which had the same logic. Also, for consistency, make sched_getaffinity() use 'cpumask_bits()' to access the bits. For a cpumask_var_t, it ends up being a pointer to the same data either way, but it's just a good idea to treat it like you would a 'cpumask_t'. The compat case already did that. Reported-by: Ryan Roberts Link: https://lore.kernel.org/lkml/7d026744-6bd6-6827-0471-b5e8eae0be3f@arm.com/ Cc: Yury Norov Signed-off-by: Linus Torvalds --- kernel/compat.c | 2 +- kernel/sched/core.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/compat.c b/kernel/compat.c index 55551989d9da..fb50f29d9b36 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -152,7 +152,7 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t, pid, unsigned int, len, if (len & (sizeof(compat_ulong_t)-1)) return -EINVAL; - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; ret = sched_getaffinity(pid, mask); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index af017e038b48..488655f2319f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8414,14 +8414,14 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, if (len & (sizeof(unsigned long)-1)) return -EINVAL; - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; ret = sched_getaffinity(pid, mask); if (ret == 0) { unsigned int retlen = min(len, cpumask_size()); - if (copy_to_user(user_mask_ptr, mask, retlen)) + if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen)) ret = -EFAULT; else ret = retlen; -- cgit From f446a630802f154ef0087771683bd4f8e9d08384 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 14 Mar 2023 20:32:55 -0300 Subject: cifs: return DFS root session id in DebugData Return the DFS root session id in /proc/fs/cifs/DebugData to make it easier to track which IPC tcon was used to get new DFS referrals for a specific connection, and aids in debugging. A simple output of it would be Sessions: 1) Address: 192.168.1.13 Uses: 1 Capability: 0x300067 Session Status: 1 Security type: RawNTLMSSP SessionId: 0xd80000000009 User: 0 Cred User: 0 DFS root session id: 0x128006c000035 Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 1911f7016fa1..19a70a69c760 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -420,6 +420,11 @@ skip_rdma: from_kuid(&init_user_ns, ses->linux_uid), from_kuid(&init_user_ns, ses->cred_uid)); + if (ses->dfs_root_ses) { + seq_printf(m, "\n\tDFS root session id: 0x%llx", + ses->dfs_root_ses->Suid); + } + spin_lock(&ses->chan_lock); if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0)) seq_puts(m, "\tPrimary channel: DISCONNECTED "); -- cgit From 6284e46bdd47743a064fe6ac834a7ac05b1fd206 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 14 Mar 2023 20:32:56 -0300 Subject: cifs: use DFS root session instead of tcon ses Use DFS root session whenever possible to get new DFS referrals otherwise we might end up with an IPC tcon (tcon->ses->tcon_ipc) that doesn't respond to them. It should be safe accessing @ses->dfs_root_ses directly in cifs_inval_name_dfs_link_error() as it has same lifetime as of @tcon. Signed-off-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Steve French --- fs/cifs/misc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 6f9c78650528..b44fb51968bf 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1262,6 +1262,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, * removing cached DFS targets that the client would eventually * need during failover. */ + ses = CIFS_DFS_ROOT_SES(ses); if (ses->server->ops->get_dfs_refer && !ses->server->ops->get_dfs_refer(xid, ses, ref_path, &refs, &num_refs, cifs_sb->local_nls, -- cgit From c753ccb2629f536b8c4feae5c223d5873c814d23 Mon Sep 17 00:00:00 2001 From: Tzafrir Cohen Date: Tue, 14 Mar 2023 15:02:48 +0200 Subject: Makefile: Make kernelrelease target work with M= That commit required the use of filechk_kernel.release for the kernelrelease Makefile target. It is currently only being set when KBUILD_EXTMOD is not set. Make sure it is set in that case as well. Fixes: 1cb86b6c3136 ("kbuild: save overridden KERNELRELEASE in include/config/kernel.release") Signed-off-by: Tzafrir Cohen Signed-off-by: Masahiro Yamada --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index d7bd0eb9b346..d0a0ba8e5a2e 100644 --- a/Makefile +++ b/Makefile @@ -1886,6 +1886,8 @@ endif else # KBUILD_EXTMOD +filechk_kernel.release = echo $(KERNELRELEASE) + ### # External module support. # When building external modules the kernel used as basis is considered -- cgit From 2fd6c4553c962ec7ea8a60c0a3632c7e984800f0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Mar 2023 05:07:26 +0900 Subject: kbuild: deb-pkg: make debian source package working again Since commit c5bf2efb058d ("kbuild: deb-pkg: fix binary-arch and clean in debian/rules"), the source package generated by 'make deb-pkg' fails to build. I terribly missed the fact that the intdeb-pkg target may regenerate include/config/kernel.release due to the following in the top Makefile: %pkg: include/config/kernel.release FORCE Restore KERNELRELEASE= option to avoid the kernel.release disagreement between build-arch and binary-arch. Fixes: c5bf2efb058d ("kbuild: deb-pkg: fix binary-arch and clean in debian/rules") Signed-off-by: Masahiro Yamada --- scripts/package/mkdebian | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index f74380036bb5..c6fbfb9f74ba 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -239,6 +239,7 @@ cat < debian/rules #!$(command -v $MAKE) -f srctree ?= . +KERNELRELEASE = ${KERNELRELEASE} build-indep: build-arch: @@ -250,7 +251,9 @@ build: build-arch binary-indep: binary-arch: build-arch - \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} intdeb-pkg + \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \ + KERNELRELEASE=\$(KERNELRELEASE) intdeb-pkg + clean: rm -rf debian/files debian/linux-* \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} clean -- cgit From 7a531c21f83d7c62825d00bee3a76c1ccfb5de9f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Mar 2023 05:07:27 +0900 Subject: kbuild: deb-pkg: do not take KERNELRELEASE from the source version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KERNELRELEASE does not need to match the package version in changelog. Rather, it conventially matches what is called 'ABINAME', which is a part of the binary package names. Both are the same by default, but the former might be overridden by KDEB_PKGVERSION. In this case, the resulting package would not boot because /lib/modules/$(uname -r) does not point the module directory. Partially revert 3ab18a625ce4 ("kbuild: deb-pkg: improve the usability of source package"). Reported-by: Péter Ujfalusi Fixes: 3ab18a625ce4 ("kbuild: deb-pkg: improve the usability of source package") Signed-off-by: Masahiro Yamada Tested-by: Peter Ujfalusi --- scripts/package/deb-build-option | 9 ++++----- scripts/package/mkdebian | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/package/deb-build-option b/scripts/package/deb-build-option index b079b0d121d4..bd53624318f2 100755 --- a/scripts/package/deb-build-option +++ b/scripts/package/deb-build-option @@ -8,9 +8,8 @@ if [ -z "${CROSS_COMPILE}${cross_compiling}" -a "${DEB_HOST_ARCH}" != "${DEB_BUI fi version=$(dpkg-parsechangelog -S Version) -version_upstream="${version%-*}" -debian_revision="${version#${version_upstream}}" -debian_revision="${debian_revision#*-}" +debian_revision="${version##*-}" -echo KERNELRELEASE=${version_upstream} -echo KBUILD_BUILD_VERSION=${debian_revision} +if [ "${version}" != "${debian_revision}" ]; then + echo KBUILD_BUILD_VERSION=${debian_revision} +fi diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index c6fbfb9f74ba..31b050368cd0 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -244,6 +244,7 @@ KERNELRELEASE = ${KERNELRELEASE} build-indep: build-arch: \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \ + KERNELRELEASE=\$(KERNELRELEASE) \ \$(shell \$(srctree)/scripts/package/deb-build-option) \ olddefconfig all -- cgit From f50aa51c4498d7886cfd9dbf439a2332f234a755 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Mar 2023 05:07:28 +0900 Subject: kbuild: deb-pkg: set CROSS_COMPILE only when undefined Commit 3ab18a625ce4 ("kbuild: deb-pkg: improve the usability of source package") set needless CROSS_COMPILE. For example, 'make allnoconfig bindeb-pkg' on a x86_64 system will set CROSS_COMPILE=i686-linux-gnu-, where the biarch compiler 'gcc' should work for building the i386 kernel. $ uname -m x86_64 $ make allnoconfig bindeb-pkg >/dev/null dpkg-architecture: warning: specified GNU system type i686-linux-gnu does not match CC system type x86_64-linux-gnu, try setting a correct CC environment variable dpkg-source --before-build . debian/rules binary scripts/Kconfig.include:39: C compiler 'i686-linux-gnu-gcc' not found make[6]: *** [scripts/kconfig/Makefile:77: olddefconfig] Error 1 make[5]: *** [Makefile:693: olddefconfig] Error 2 make[4]: *** [Makefile:358: __build_one_by_one] Error 2 make[3]: *** [debian/rules:7: build-arch] Error 2 dpkg-buildpackage: error: debian/rules binary subprocess returned exit status 2 make[2]: *** [scripts/Makefile.package:127: bindeb-pkg] Error 2 make[1]: *** [Makefile:1657: bindeb-pkg] Error 2 make: *** [Makefile:358: __build_one_by_one] Error 2 Check whether CROSS_COMPILE is defined, instead of whether it is non-empty. If you invoke debian/rules via Kbuild, CROSS_COMPILE is always defined in the top Makefile. Fixes: 3ab18a625ce4 ("kbuild: deb-pkg: improve the usability of source package") Signed-off-by: Masahiro Yamada --- scripts/package/deb-build-option | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/package/deb-build-option b/scripts/package/deb-build-option index bd53624318f2..7950eff01781 100755 --- a/scripts/package/deb-build-option +++ b/scripts/package/deb-build-option @@ -1,9 +1,8 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only -# Set up CROSS_COMPILE if we are cross-compiling, but not called from the -# kernel toplevel Makefile -if [ -z "${CROSS_COMPILE}${cross_compiling}" -a "${DEB_HOST_ARCH}" != "${DEB_BUILD_ARCH}" ]; then +# Set up CROSS_COMPILE if not defined yet +if [ "${CROSS_COMPILE+set}" != "set" -a "${DEB_HOST_ARCH}" != "${DEB_BUILD_ARCH}" ]; then echo CROSS_COMPILE=${DEB_HOST_GNU_TYPE}- fi -- cgit From b611daae5efc64e817171a54021d3b334cc1bc41 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Mar 2023 05:07:29 +0900 Subject: kbuild: deb-pkg: split image and debug objects staging out into functions Prepare for the refactoring in the next commit. Signed-off-by: Masahiro Yamada --- scripts/package/builddeb | 222 +++++++++++++++++++++++++---------------------- 1 file changed, 116 insertions(+), 106 deletions(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index ff5e7d8e380b..906889b304a4 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -51,6 +51,115 @@ create_package() { dpkg-deb $dpkg_deb_opts ${KDEB_COMPRESS:+-Z$KDEB_COMPRESS} --build "$pdir" .. } +install_linux_image () { + pdir=$1 + pname=$2 + + rm -rf ${pdir} + + # Only some architectures with OF support have this target + if is_enabled CONFIG_OF_EARLY_FLATTREE && [ -d "${srctree}/arch/${SRCARCH}/boot/dts" ]; then + ${MAKE} -f ${srctree}/Makefile INSTALL_DTBS_PATH="${pdir}/usr/lib/linux-image-${KERNELRELEASE}" dtbs_install + fi + + if is_enabled CONFIG_MODULES; then + ${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${pdir}" modules_install + rm -f "${pdir}/lib/modules/${KERNELRELEASE}/build" + rm -f "${pdir}/lib/modules/${KERNELRELEASE}/source" + if [ "${SRCARCH}" = um ] ; then + mkdir -p "${pdir}/usr/lib/uml/modules" + mv "${pdir}/lib/modules/${KERNELRELEASE}" "${pdir}/usr/lib/uml/modules/${KERNELRELEASE}" + fi + fi + + # Install the kernel + if [ "${ARCH}" = um ] ; then + mkdir -p "${pdir}/usr/bin" "${pdir}/usr/share/doc/${pname}" + cp System.map "${pdir}/usr/lib/uml/modules/${KERNELRELEASE}/System.map" + cp ${KCONFIG_CONFIG} "${pdir}/usr/share/doc/${pname}/config" + gzip "${pdir}/usr/share/doc/${pname}/config" + else + mkdir -p "${pdir}/boot" + cp System.map "${pdir}/boot/System.map-${KERNELRELEASE}" + cp ${KCONFIG_CONFIG} "${pdir}/boot/config-${KERNELRELEASE}" + fi + + # Not all arches have the same installed path in debian + # XXX: have each arch Makefile export a variable of the canonical image install + # path instead + case "${SRCARCH}" in + um) + installed_image_path="usr/bin/linux-${KERNELRELEASE}";; + parisc|mips|powerpc) + installed_image_path="boot/vmlinux-${KERNELRELEASE}";; + *) + installed_image_path="boot/vmlinuz-${KERNELRELEASE}";; + esac + cp "$(${MAKE} -s -f ${srctree}/Makefile image_name)" "${pdir}/${installed_image_path}" + + # Install the maintainer scripts + # Note: hook scripts under /etc/kernel are also executed by official Debian + # kernel packages, as well as kernel packages built using make-kpkg. + # make-kpkg sets $INITRD to indicate whether an initramfs is wanted, and + # so do we; recent versions of dracut and initramfs-tools will obey this. + debhookdir=${KDEB_HOOKDIR:-/etc/kernel} + for script in postinst postrm preinst prerm; do + mkdir -p "${pdir}${debhookdir}/${script}.d" + + mkdir -p "${pdir}/DEBIAN" + cat <<-EOF > "${pdir}/DEBIAN/${script}" + + #!/bin/sh + + set -e + + # Pass maintainer script parameters to hook scripts + export DEB_MAINT_PARAMS="\$*" + + # Tell initramfs builder whether it's wanted + export INITRD=$(if_enabled_echo CONFIG_BLK_DEV_INITRD Yes No) + + test -d ${debhookdir}/${script}.d && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" ${debhookdir}/${script}.d + exit 0 + EOF + chmod 755 "${pdir}/DEBIAN/${script}" + done +} + +install_linux_image_dbg () { + pdir=$1 + image_pdir=$2 + + rm -rf ${pdir} + + for module in $(find ${image_pdir}/lib/modules/ -name *.ko -printf '%P\n'); do + module=lib/modules/${module} + mkdir -p $(dirname ${pdir}/usr/lib/debug/${module}) + # only keep debug symbols in the debug file + ${OBJCOPY} --only-keep-debug ${image_pdir}/${module} ${pdir}/usr/lib/debug/${module} + # strip original module from debug symbols + ${OBJCOPY} --strip-debug ${image_pdir}/${module} + # then add a link to those + ${OBJCOPY} --add-gnu-debuglink=${pdir}/usr/lib/debug/${module} ${image_pdir}/${module} + done + + # re-sign stripped modules + if is_enabled CONFIG_MODULE_SIG_ALL; then + ${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${image_pdir}" modules_sign + fi + + # Build debug package + # Different tools want the image in different locations + # perf + mkdir -p ${pdir}/usr/lib/debug/lib/modules/${KERNELRELEASE}/ + cp vmlinux ${pdir}/usr/lib/debug/lib/modules/${KERNELRELEASE}/ + # systemtap + mkdir -p ${pdir}/usr/lib/debug/boot/ + ln -s ../lib/modules/${KERNELRELEASE}/vmlinux ${pdir}/usr/lib/debug/boot/vmlinux-${KERNELRELEASE} + # kdump-tools + ln -s lib/modules/${KERNELRELEASE}/vmlinux ${pdir}/usr/lib/debug/vmlinux-${KERNELRELEASE} +} + deploy_kernel_headers () { pdir=$1 @@ -105,8 +214,6 @@ deploy_libc_headers () { } version=$KERNELRELEASE -tmpdir=debian/linux-image -dbg_dir=debian/linux-image-dbg packagename=linux-image-$version dbg_packagename=$packagename-dbg @@ -114,97 +221,7 @@ if [ "$ARCH" = "um" ] ; then packagename=user-mode-linux-$version fi -# Not all arches have the same installed path in debian -# XXX: have each arch Makefile export a variable of the canonical image install -# path instead -case $ARCH in -um) - installed_image_path="usr/bin/linux-$version" - ;; -parisc|mips|powerpc) - installed_image_path="boot/vmlinux-$version" - ;; -*) - installed_image_path="boot/vmlinuz-$version" -esac - -BUILD_DEBUG=$(if_enabled_echo CONFIG_DEBUG_INFO Yes) - -# Setup the directory structure -rm -rf "$tmpdir" "$dbg_dir" debian/files -mkdir -m 755 -p "$tmpdir/DEBIAN" -mkdir -p "$tmpdir/lib" "$tmpdir/boot" - -# Install the kernel -if [ "$ARCH" = "um" ] ; then - mkdir -p "$tmpdir/usr/lib/uml/modules/$version" "$tmpdir/usr/bin" "$tmpdir/usr/share/doc/$packagename" - cp System.map "$tmpdir/usr/lib/uml/modules/$version/System.map" - cp $KCONFIG_CONFIG "$tmpdir/usr/share/doc/$packagename/config" - gzip "$tmpdir/usr/share/doc/$packagename/config" -else - cp System.map "$tmpdir/boot/System.map-$version" - cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version" -fi -cp "$($MAKE -s -f $srctree/Makefile image_name)" "$tmpdir/$installed_image_path" - -if is_enabled CONFIG_OF_EARLY_FLATTREE; then - # Only some architectures with OF support have this target - if [ -d "${srctree}/arch/$SRCARCH/boot/dts" ]; then - $MAKE -f $srctree/Makefile INSTALL_DTBS_PATH="$tmpdir/usr/lib/$packagename" dtbs_install - fi -fi - -if is_enabled CONFIG_MODULES; then - INSTALL_MOD_PATH="$tmpdir" $MAKE -f $srctree/Makefile modules_install - rm -f "$tmpdir/lib/modules/$version/build" - rm -f "$tmpdir/lib/modules/$version/source" - if [ "$ARCH" = "um" ] ; then - mv "$tmpdir/lib/modules/$version"/* "$tmpdir/usr/lib/uml/modules/$version/" - rmdir "$tmpdir/lib/modules/$version" - fi - if [ -n "$BUILD_DEBUG" ] ; then - for module in $(find $tmpdir/lib/modules/ -name *.ko -printf '%P\n'); do - module=lib/modules/$module - mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module) - # only keep debug symbols in the debug file - $OBJCOPY --only-keep-debug $tmpdir/$module $dbg_dir/usr/lib/debug/$module - # strip original module from debug symbols - $OBJCOPY --strip-debug $tmpdir/$module - # then add a link to those - $OBJCOPY --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $tmpdir/$module - done - - # resign stripped modules - if is_enabled CONFIG_MODULE_SIG_ALL; then - INSTALL_MOD_PATH="$tmpdir" $MAKE -f $srctree/Makefile modules_sign - fi - fi -fi - -# Install the maintainer scripts -# Note: hook scripts under /etc/kernel are also executed by official Debian -# kernel packages, as well as kernel packages built using make-kpkg. -# make-kpkg sets $INITRD to indicate whether an initramfs is wanted, and -# so do we; recent versions of dracut and initramfs-tools will obey this. -debhookdir=${KDEB_HOOKDIR:-/etc/kernel} -for script in postinst postrm preinst prerm ; do - mkdir -p "$tmpdir$debhookdir/$script.d" - cat < "$tmpdir/DEBIAN/$script" -#!/bin/sh - -set -e - -# Pass maintainer script parameters to hook scripts -export DEB_MAINT_PARAMS="\$*" - -# Tell initramfs builder whether it's wanted -export INITRD=$(if_enabled_echo CONFIG_BLK_DEV_INITRD Yes No) - -test -d $debhookdir/$script.d && run-parts --arg="$version" --arg="/$installed_image_path" $debhookdir/$script.d -exit 0 -EOF - chmod 755 "$tmpdir/DEBIAN/$script" -done +rm -f debian/files if [ "$ARCH" != "um" ]; then if is_enabled CONFIG_MODULES; then @@ -216,20 +233,13 @@ if [ "$ARCH" != "um" ]; then create_package linux-libc-dev debian/linux-libc-dev fi -create_package "$packagename" "$tmpdir" +install_linux_image debian/linux-image "$packagename" -if [ -n "$BUILD_DEBUG" ] ; then - # Build debug package - # Different tools want the image in different locations - # perf - mkdir -p $dbg_dir/usr/lib/debug/lib/modules/$version/ - cp vmlinux $dbg_dir/usr/lib/debug/lib/modules/$version/ - # systemtap - mkdir -p $dbg_dir/usr/lib/debug/boot/ - ln -s ../lib/modules/$version/vmlinux $dbg_dir/usr/lib/debug/boot/vmlinux-$version - # kdump-tools - ln -s lib/modules/$version/vmlinux $dbg_dir/usr/lib/debug/vmlinux-$version - create_package "$dbg_packagename" "$dbg_dir" +if is_enabled CONFIG_DEBUG_INFO; then + install_linux_image_dbg debian/linux-image-dbg debian/linux-image + create_package "$dbg_packagename" debian/linux-image-dbg fi +create_package "$packagename" debian/linux-image + exit 0 -- cgit From 36862e14e31611f9786622db366327209a7aede7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Mar 2023 05:07:30 +0900 Subject: kbuild: deb-pkg: use dh_listpackages to know enabled packages Use dh_listpackages to get a list of all binary packages. With this, debian/control lists which binary packages will be produced. Previously, ARCH=um listed linux-libc-dev in debian/control, but it was not generated because each of mkdebian and builddeb independently maintained the if-conditionals. Another motivation is to allow scripts/package/builddeb to get the package name (linux-image-*, etc.) dynamically from debian/control. This will also allow the BuildProfile to control the generation of the binary packages. Signed-off-by: Masahiro Yamada --- scripts/package/builddeb | 61 ++++++++++++++++++++++++++---------------------- scripts/package/mkdebian | 7 +++++- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 906889b304a4..c5ae57167d7c 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -160,7 +160,7 @@ install_linux_image_dbg () { ln -s lib/modules/${KERNELRELEASE}/vmlinux ${pdir}/usr/lib/debug/vmlinux-${KERNELRELEASE} } -deploy_kernel_headers () { +install_kernel_headers () { pdir=$1 rm -rf $pdir @@ -198,7 +198,7 @@ deploy_kernel_headers () { ln -s /usr/src/linux-headers-$version $pdir/lib/modules/$version/build } -deploy_libc_headers () { +install_libc_headers () { pdir=$1 rm -rf $pdir @@ -213,33 +213,38 @@ deploy_libc_headers () { mv $pdir/usr/include/asm $pdir/usr/include/$host_arch/ } -version=$KERNELRELEASE -packagename=linux-image-$version -dbg_packagename=$packagename-dbg - -if [ "$ARCH" = "um" ] ; then - packagename=user-mode-linux-$version -fi - rm -f debian/files -if [ "$ARCH" != "um" ]; then - if is_enabled CONFIG_MODULES; then - deploy_kernel_headers debian/linux-headers - create_package linux-headers-$version debian/linux-headers - fi - - deploy_libc_headers debian/linux-libc-dev - create_package linux-libc-dev debian/linux-libc-dev -fi - -install_linux_image debian/linux-image "$packagename" - -if is_enabled CONFIG_DEBUG_INFO; then - install_linux_image_dbg debian/linux-image-dbg debian/linux-image - create_package "$dbg_packagename" debian/linux-image-dbg -fi - -create_package "$packagename" debian/linux-image +packages_enabled=$(dh_listpackages) + +for package in ${packages_enabled} +do + case ${package} in + *-dbg) + # This must be done after linux-image, that is, we expect the + # debug package appears after linux-image in debian/control. + install_linux_image_dbg debian/linux-image-dbg debian/linux-image;; + linux-image-*|user-mode-linux-*) + install_linux_image debian/linux-image ${package};; + linux-libc-dev) + install_libc_headers debian/linux-libc-dev;; + linux-headers-*) + install_kernel_headers debian/linux-headers;; + esac +done + +for package in ${packages_enabled} +do + case ${package} in + *-dbg) + create_package ${package} debian/linux-image-dbg;; + linux-image-*|user-mode-linux-*) + create_package ${package} debian/linux-image;; + linux-libc-dev) + create_package ${package} debian/linux-libc-dev;; + linux-headers-*) + create_package ${package} debian/linux-headers;; + esac +done exit 0 diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 31b050368cd0..e80a661a79ee 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -192,7 +192,7 @@ Section: kernel Priority: optional Maintainer: $maintainer Rules-Requires-Root: no -Build-Depends: bc, rsync, kmod, cpio, bison, flex $extra_build_depends +Build-Depends: bc, debhelper, rsync, kmod, cpio, bison, flex $extra_build_depends Homepage: https://www.kernel.org/ Package: $packagename-$version @@ -200,6 +200,10 @@ Architecture: $debarch Description: Linux kernel, version $version This package contains the Linux kernel, modules and corresponding other files, version: $version. +EOF + +if [ "${SRCARCH}" != um ]; then +cat <> debian/control Package: linux-libc-dev Section: devel @@ -222,6 +226,7 @@ Description: Linux kernel headers for $version on $debarch This is useful for people who need to build external modules EOF fi +fi if is_enabled CONFIG_DEBUG_INFO; then cat <> debian/control -- cgit From 248401cb2c4612d83eb0c352ee8103b78b8eb365 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Fri, 10 Mar 2023 11:48:33 -0800 Subject: ice: avoid bonding causing auxiliary plug/unplug under RTNL lock RDMA is not supported in ice on a PF that has been added to a bonded interface. To enforce this, when an interface enters a bond, we unplug the auxiliary device that supports RDMA functionality. This unplug currently happens in the context of handling the netdev bonding event. This event is sent to the ice driver under RTNL context. This is causing a deadlock where the RDMA driver is waiting for the RTNL lock to complete the removal. Defer the unplugging/re-plugging of the auxiliary device to the service task so that it is not performed under the RTNL lock context. Cc: stable@vger.kernel.org # 6.1.x Reported-by: Jaroslav Pulchart Link: https://lore.kernel.org/netdev/CAK8fFZ6A_Gphw_3-QMGKEFQk=sfCw1Qmq0TVZK3rtAi7vb621A@mail.gmail.com/ Fixes: 5cb1ebdbc434 ("ice: Fix race condition during interface enslave") Fixes: 4eace75e0853 ("RDMA/irdma: Report the correct link speed") Signed-off-by: Dave Ertman Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230310194833.3074601-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice.h | 14 +++++--------- drivers/net/ethernet/intel/ice/ice_main.c | 19 ++++++++----------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index b0e29e342401..e809249500e1 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -509,6 +509,7 @@ enum ice_pf_flags { ICE_FLAG_VF_VLAN_PRUNING, ICE_FLAG_LINK_LENIENT_MODE_ENA, ICE_FLAG_PLUG_AUX_DEV, + ICE_FLAG_UNPLUG_AUX_DEV, ICE_FLAG_MTU_CHANGED, ICE_FLAG_GNSS, /* GNSS successfully initialized */ ICE_PF_FLAGS_NBITS /* must be last */ @@ -955,16 +956,11 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf) */ static inline void ice_clear_rdma_cap(struct ice_pf *pf) { - /* We can directly unplug aux device here only if the flag bit - * ICE_FLAG_PLUG_AUX_DEV is not set because ice_unplug_aux_dev() - * could race with ice_plug_aux_dev() called from - * ice_service_task(). In this case we only clear that bit now and - * aux device will be unplugged later once ice_plug_aux_device() - * called from ice_service_task() finishes (see ice_service_task()). + /* defer unplug to service task to avoid RTNL lock and + * clear PLUG bit so that pending plugs don't interfere */ - if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) - ice_unplug_aux_dev(pf); - + clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags); + set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags); clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); } #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 567694bf098b..c233464b8f6b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2316,18 +2316,15 @@ static void ice_service_task(struct work_struct *work) } } - if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) { - /* Plug aux device per request */ - ice_plug_aux_dev(pf); + /* unplug aux dev per request, if an unplug request came in + * while processing a plug request, this will handle it + */ + if (test_and_clear_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags)) + ice_unplug_aux_dev(pf); - /* Mark plugging as done but check whether unplug was - * requested during ice_plug_aux_dev() call - * (e.g. from ice_clear_rdma_cap()) and if so then - * plug aux device. - */ - if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) - ice_unplug_aux_dev(pf); - } + /* Plug aux device per request */ + if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) + ice_plug_aux_dev(pf); if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) { struct iidc_event *event; -- cgit From 4b397c06cb987935b1b097336532aa6b4210e091 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Mar 2023 19:11:09 +0000 Subject: net: tunnels: annotate lockless accesses to dev->needed_headroom IP tunnels can apparently update dev->needed_headroom in their xmit path. This patch takes care of three tunnels xmit, and also the core LL_RESERVED_SPACE() and LL_RESERVED_SPACE_EXTRA() helpers. More changes might be needed for completeness. BUG: KCSAN: data-race in ip_tunnel_xmit / ip_tunnel_xmit read to 0xffff88815b9da0ec of 2 bytes by task 888 on cpu 1: ip_tunnel_xmit+0x1270/0x1730 net/ipv4/ip_tunnel.c:803 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 write to 0xffff88815b9da0ec of 2 bytes by task 2379 on cpu 0: ip_tunnel_xmit+0x1294/0x1730 net/ipv4/ip_tunnel.c:804 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip6_finish_output2+0x9bc/0xc50 net/ipv6/ip6_output.c:134 __ip6_finish_output net/ipv6/ip6_output.c:195 [inline] ip6_finish_output+0x39a/0x4e0 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0xeb/0x220 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:444 [inline] NF_HOOK include/linux/netfilter.h:302 [inline] mld_sendpack+0x438/0x6a0 net/ipv6/mcast.c:1820 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x519/0x7b0 net/ipv6/mcast.c:2653 process_one_work+0x3e6/0x750 kernel/workqueue.c:2390 worker_thread+0x5f2/0xa10 kernel/workqueue.c:2537 kthread+0x1ac/0x1e0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 value changed: 0x0dd4 -> 0x0e14 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 2379 Comm: kworker/0:0 Not tainted 6.3.0-rc1-syzkaller-00002-g8ca09d5fa354-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023 Workqueue: mld mld_ifc_work Fixes: 8eb30be0352d ("ipv6: Create ip6_tnl_xmit") Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230310191109.2384387-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 6 ++++-- net/ipv4/ip_tunnel.c | 12 ++++++------ net/ipv6/ip6_tunnel.c | 4 ++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6a14b7b11766..470085b121d3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -297,9 +297,11 @@ struct hh_cache { * relationship HH alignment <= LL alignment. */ #define LL_RESERVED_SPACE(dev) \ - ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) + ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom)) \ + & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD) #define LL_RESERVED_SPACE_EXTRA(dev,extra) \ - ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) + ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom) + (extra)) \ + & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD) struct header_ops { int (*create) (struct sk_buff *skb, struct net_device *dev, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index de90b09dfe78..2541083d49ad 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -614,10 +614,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; - if (headroom > dev->needed_headroom) - dev->needed_headroom = headroom; + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); - if (skb_cow_head(skb, dev->needed_headroom)) { + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ip_rt_put(rt); goto tx_dropped; } @@ -800,10 +800,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; + if (max_headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, max_headroom); - if (skb_cow_head(skb, dev->needed_headroom)) { + if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) { ip_rt_put(rt); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 47b6607a1370..5e80e517f071 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1240,8 +1240,8 @@ route_lookup: */ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; + if (max_headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, max_headroom); err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) -- cgit From c22c3bbf351e4ce905f082649cffa1ff893ea8c1 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 11 Mar 2023 19:34:45 +0100 Subject: net: phy: smsc: bail out in lan87xx_read_status if genphy_read_status fails If genphy_read_status fails then further access to the PHY may result in unpredictable behavior. To prevent this bail out immediately if genphy_read_status fails. Fixes: 4223dbffed9f ("net: phy: smsc: Re-enable EDPD mode for LAN87xx") Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/026aa4f2-36f5-1c10-ab9f-cdb17dda6ac4@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/smsc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 00d9eff91dcf..df2c5435c5c4 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -199,8 +199,11 @@ static int lan95xx_config_aneg_ext(struct phy_device *phydev) static int lan87xx_read_status(struct phy_device *phydev) { struct smsc_phy_priv *priv = phydev->priv; + int err; - int err = genphy_read_status(phydev); + err = genphy_read_status(phydev); + if (err) + return err; if (!phydev->link && priv->energy_enable && phydev->irq == PHY_POLL) { /* Disable EDPD to wake up PHY */ -- cgit From d9ba9934285514f1f95d96326a82398a22dc77f2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 11 Mar 2023 19:19:03 -0800 Subject: tcp: Fix bind() conflict check for dual-stack wildcard address. Paul Holzinger reported [0] that commit 5456262d2baa ("net: Fix incorrect address comparison when searching for a bind2 bucket") introduced a bind() regression. Paul also gave a nice repro that calls two types of bind() on the same port, both of which now succeed, but the second call should fail: bind(fd1, ::, port) + bind(fd2, 127.0.0.1, port) The cited commit added address family tests in three functions to fix the uninit-value KMSAN report. [1] However, the test added to inet_bind2_bucket_match_addr_any() removed a necessary conflict check; the dual-stack wildcard address no longer conflicts with an IPv4 non-wildcard address. If tb->family is AF_INET6 and sk->sk_family is AF_INET in inet_bind2_bucket_match_addr_any(), we still need to check if tb has the dual-stack wildcard address. Note that the IPv4 wildcard address does not conflict with IPv6 non-wildcard addresses. [0]: https://lore.kernel.org/netdev/e21bf153-80b0-9ec0-15ba-e04a4ad42c34@redhat.com/ [1]: https://lore.kernel.org/netdev/CAG_fn=Ud3zSW7AZWXc+asfMhZVL5ETnvuY44Pmyv4NPv-ijN-A@mail.gmail.com/ Fixes: 5456262d2baa ("net: Fix incorrect address comparison when searching for a bind2 bucket") Signed-off-by: Kuniyuki Iwashima Reported-by: Paul Holzinger Link: https://lore.kernel.org/netdev/CAG_fn=Ud3zSW7AZWXc+asfMhZVL5ETnvuY44Pmyv4NPv-ijN-A@mail.gmail.com/ Reviewed-by: Eric Dumazet Tested-by: Paul Holzinger Reviewed-by: Martin KaFai Lau Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e41fdc38ce19..6edae3886885 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -828,8 +828,14 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const #if IS_ENABLED(CONFIG_IPV6) struct in6_addr addr_any = {}; - if (sk->sk_family != tb->family) + if (sk->sk_family != tb->family) { + if (sk->sk_family == AF_INET) + return net_eq(ib2_net(tb), net) && tb->port == port && + tb->l3mdev == l3mdev && + ipv6_addr_equal(&tb->v6_rcv_saddr, &addr_any); + return false; + } if (sk->sk_family == AF_INET6) return net_eq(ib2_net(tb), net) && tb->port == port && -- cgit From 13715acf8ab5b32a6d7e42686fceeb66df114185 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 11 Mar 2023 19:19:04 -0800 Subject: selftest: Add test for bind() conflicts. The test checks if (IPv4, IPv6) address pair properly conflict or not. * IPv4 * 0.0.0.0 * 127.0.0.1 * IPv6 * :: * ::1 If the IPv6 address is [::], the second bind() always fails. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/bind_wildcard.c | 114 ++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 tools/testing/selftests/net/bind_wildcard.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index a6911cae368c..80f06aa62034 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only bind_bhash bind_timewait +bind_wildcard csum cmsg_sender diag_uid diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 6cd8993454d7..80fbfe0330f6 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -80,6 +80,7 @@ TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += csum TEST_GEN_FILES += nat6to4.o TEST_GEN_FILES += ip_local_port_range +TEST_GEN_FILES += bind_wildcard TEST_FILES := settings diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c new file mode 100644 index 000000000000..58edfc15d28b --- /dev/null +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include +#include + +#include "../kselftest_harness.h" + +FIXTURE(bind_wildcard) +{ + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + int expected_errno; +}; + +FIXTURE_VARIANT(bind_wildcard) +{ + const __u32 addr4_const; + const struct in6_addr *addr6_const; +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_any, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_loopback, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_any, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_loopback, +}; + +FIXTURE_SETUP(bind_wildcard) +{ + self->addr4.sin_family = AF_INET; + self->addr4.sin_port = htons(0); + self->addr4.sin_addr.s_addr = htonl(variant->addr4_const); + + self->addr6.sin6_family = AF_INET6; + self->addr6.sin6_port = htons(0); + self->addr6.sin6_addr = *variant->addr6_const; + + if (variant->addr6_const == &in6addr_any) + self->expected_errno = EADDRINUSE; + else + self->expected_errno = 0; +} + +FIXTURE_TEARDOWN(bind_wildcard) +{ +} + +void bind_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_wildcard) *self, + struct sockaddr *addr1, socklen_t addrlen1, + struct sockaddr *addr2, socklen_t addrlen2) +{ + int fd[2]; + int ret; + + fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0); + ASSERT_GT(fd[0], 0); + + ret = bind(fd[0], addr1, addrlen1); + ASSERT_EQ(ret, 0); + + ret = getsockname(fd[0], addr1, &addrlen1); + ASSERT_EQ(ret, 0); + + ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port; + + fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0); + ASSERT_GT(fd[1], 0); + + ret = bind(fd[1], addr2, addrlen2); + if (self->expected_errno) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, self->expected_errno); + } else { + ASSERT_EQ(ret, 0); + } + + close(fd[1]); + close(fd[0]); +} + +TEST_F(bind_wildcard, v4_v6) +{ + bind_sockets(_metadata, self, + (struct sockaddr *)&self->addr4, sizeof(self->addr6), + (struct sockaddr *)&self->addr6, sizeof(self->addr6)); +} + +TEST_F(bind_wildcard, v6_v4) +{ + bind_sockets(_metadata, self, + (struct sockaddr *)&self->addr6, sizeof(self->addr6), + (struct sockaddr *)&self->addr4, sizeof(self->addr4)); +} + +TEST_HARNESS_MAIN -- cgit From 5000fe6c27827a61d8250a7e4a1d26c3298ef4f6 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Mon, 13 Mar 2023 00:08:37 +0800 Subject: nfc: st-nci: Fix use after free bug in ndlc_remove due to race condition This bug influences both st_nci_i2c_remove and st_nci_spi_remove. Take st_nci_i2c_remove as an example. In st_nci_i2c_probe, it called ndlc_probe and bound &ndlc->sm_work with llt_ndlc_sm_work. When it calls ndlc_recv or timeout handler, it will finally call schedule_work to start the work. When we call st_nci_i2c_remove to remove the driver, there may be a sequence as follows: Fix it by finishing the work before cleanup in ndlc_remove CPU0 CPU1 |llt_ndlc_sm_work st_nci_i2c_remove | ndlc_remove | st_nci_remove | nci_free_device| kfree(ndev) | //free ndlc->ndev | |llt_ndlc_rcv_queue |nci_recv_frame |//use ndlc->ndev Fixes: 35630df68d60 ("NFC: st21nfcb: Add driver for STMicroelectronics ST21NFCB NFC chip") Signed-off-by: Zheng Wang Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230312160837.2040857-1-zyytlz.wz@163.com Signed-off-by: Jakub Kicinski --- drivers/nfc/st-nci/ndlc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c index 755460a73c0d..d2aa9f766738 100644 --- a/drivers/nfc/st-nci/ndlc.c +++ b/drivers/nfc/st-nci/ndlc.c @@ -282,13 +282,15 @@ EXPORT_SYMBOL(ndlc_probe); void ndlc_remove(struct llt_ndlc *ndlc) { - st_nci_remove(ndlc->ndev); - /* cancel timers */ del_timer_sync(&ndlc->t1_timer); del_timer_sync(&ndlc->t2_timer); ndlc->t2_active = false; ndlc->t1_active = false; + /* cancel work */ + cancel_work_sync(&ndlc->sm_work); + + st_nci_remove(ndlc->ndev); skb_queue_purge(&ndlc->rcv_q); skb_queue_purge(&ndlc->send_q); -- cgit From 5ce76fe1eead179c058d9151ee1f4088cfdc1c6b Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Mar 2023 00:08:40 +0100 Subject: veth: rely on rtnl_dereference() instead of on rcu_dereference() in veth_set_xdp_features() Fix the following kernel warning in veth_set_xdp_features routine relying on rtnl_dereference() instead of on rcu_dereference(): ============================= WARNING: suspicious RCU usage 6.3.0-rc1-00144-g064d70527aaa #149 Not tainted ----------------------------- drivers/net/veth.c:1265 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by ip/135: (net/core/rtnetlink.c:6172) stack backtrace: CPU: 1 PID: 135 Comm: ip Not tainted 6.3.0-rc1-00144-g064d70527aaa #149 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:107) lockdep_rcu_suspicious (include/linux/context_tracking.h:152) veth_set_xdp_features (drivers/net/veth.c:1265 (discriminator 9)) veth_newlink (drivers/net/veth.c:1892) ? veth_set_features (drivers/net/veth.c:1774) ? kasan_save_stack (mm/kasan/common.c:47) ? kasan_save_stack (mm/kasan/common.c:46) ? kasan_set_track (mm/kasan/common.c:52) ? alloc_netdev_mqs (include/linux/slab.h:737) ? rcu_read_lock_sched_held (kernel/rcu/update.c:125) ? trace_kmalloc (include/trace/events/kmem.h:54) ? __xdp_rxq_info_reg (net/core/xdp.c:188) ? alloc_netdev_mqs (net/core/dev.c:10657) ? rtnl_create_link (net/core/rtnetlink.c:3312) rtnl_newlink_create (net/core/rtnetlink.c:3440) ? rtnl_link_get_net_capable.constprop.0 (net/core/rtnetlink.c:3391) __rtnl_newlink (net/core/rtnetlink.c:3657) ? lock_downgrade (kernel/locking/lockdep.c:5321) ? rtnl_link_unregister (net/core/rtnetlink.c:3487) rtnl_newlink (net/core/rtnetlink.c:3671) rtnetlink_rcv_msg (net/core/rtnetlink.c:6174) ? rtnl_link_fill (net/core/rtnetlink.c:6070) ? mark_usage (kernel/locking/lockdep.c:4914) ? mark_usage (kernel/locking/lockdep.c:4914) netlink_rcv_skb (net/netlink/af_netlink.c:2574) ? rtnl_link_fill (net/core/rtnetlink.c:6070) ? netlink_ack (net/netlink/af_netlink.c:2551) ? lock_acquire (kernel/locking/lockdep.c:467) ? net_generic (include/linux/rcupdate.h:805) ? netlink_deliver_tap (include/linux/rcupdate.h:805) netlink_unicast (net/netlink/af_netlink.c:1340) ? netlink_attachskb (net/netlink/af_netlink.c:1350) netlink_sendmsg (net/netlink/af_netlink.c:1942) ? netlink_unicast (net/netlink/af_netlink.c:1861) ? netlink_unicast (net/netlink/af_netlink.c:1861) sock_sendmsg (net/socket.c:727) ____sys_sendmsg (net/socket.c:2501) ? kernel_sendmsg (net/socket.c:2448) ? __copy_msghdr (net/socket.c:2428) ___sys_sendmsg (net/socket.c:2557) ? mark_usage (kernel/locking/lockdep.c:4914) ? do_recvmmsg (net/socket.c:2544) ? lock_acquire (kernel/locking/lockdep.c:467) ? find_held_lock (kernel/locking/lockdep.c:5159) ? __lock_release (kernel/locking/lockdep.c:5345) ? __might_fault (mm/memory.c:5625) ? lock_downgrade (kernel/locking/lockdep.c:5321) ? __fget_light (include/linux/atomic/atomic-arch-fallback.h:227) __sys_sendmsg (include/linux/file.h:31) ? __sys_sendmsg_sock (net/socket.c:2572) ? rseq_get_rseq_cs (kernel/rseq.c:275) ? lockdep_hardirqs_on_prepare.part.0 (kernel/locking/lockdep.c:4263) do_syscall_64 (arch/x86/entry/common.c:50) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) RIP: 0033:0x7f0d1aadeb17 Code: 0f 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 Fixes: fccca038f300 ("veth: take into account device reconfiguration for xdp_features flag") Suggested-by: Eric Dumazet Reported-by: Matthieu Baerts Link: https://lore.kernel.org/netdev/cover.1678364612.git.lorenzo@kernel.org/T/#me4c9d8e985ec7ebee981cfdb5bc5ec651ef4035d Signed-off-by: Lorenzo Bianconi Reported-by: syzbot+c3d0d9c42d59ff644ea6@syzkaller.appspotmail.com Reviewed-by: Eric Dumazet Tested-by: Matthieu Baerts Link: https://lore.kernel.org/r/dfd6a9a7d85e9113063165e1f47b466b90ad7b8a.1678748579.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 293dc3b2c84a..4da74ac27f9a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1262,7 +1262,7 @@ static void veth_set_xdp_features(struct net_device *dev) struct veth_priv *priv = netdev_priv(dev); struct net_device *peer; - peer = rcu_dereference(priv->peer); + peer = rtnl_dereference(priv->peer); if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) { xdp_features_t val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | -- cgit From 35c356924fe3669dfbb1185607ce3b37f70bfa80 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 13 Mar 2023 18:21:24 +0100 Subject: mlxsw: spectrum: Fix incorrect parsing depth after reload Spectrum ASICs have a configurable limit on how deep into the packet they parse. By default, the limit is 96 bytes. There are several cases where this parsing depth is not enough and there is a need to increase it. For example, timestamping of PTP packets and a FIB multipath hash policy that requires hashing on inner fields. The driver therefore maintains a reference count that reflects the number of consumers that require an increased parsing depth. During reload_down() the parsing depth reference count does not necessarily drop to zero, but the parsing depth itself is restored to the default during reload_up() when the firmware is reset. It is therefore possible to end up in situations where the driver thinks that the parsing depth was increased (reference count is non-zero), when it is not. Fix by making sure that all the consumers that increase the parsing depth reference count also decrease it during reload_down(). Specifically, make sure that when the routing code is de-initialized it drops the reference count if it was increased because of a FIB multipath hash policy that requires hashing on inner fields. Add a warning if the reference count is not zero after the driver was de-initialized and explicitly reset it to zero during initialization for good measures. Fixes: 2d91f0803b84 ("mlxsw: spectrum: Add infrastructure for parsing configuration") Reported-by: Maksym Yaremchuk Signed-off-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/9c35e1b3e6c1d8f319a2449d14e2b86373f3b3ba.1678727526.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a8f94b7544ee..02a327744a61 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2937,6 +2937,7 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, static void mlxsw_sp_parsing_init(struct mlxsw_sp *mlxsw_sp) { + refcount_set(&mlxsw_sp->parsing.parsing_depth_ref, 0); mlxsw_sp->parsing.parsing_depth = MLXSW_SP_DEFAULT_PARSING_DEPTH; mlxsw_sp->parsing.vxlan_udp_dport = MLXSW_SP_DEFAULT_VXLAN_UDP_DPORT; mutex_init(&mlxsw_sp->parsing.lock); @@ -2945,6 +2946,7 @@ static void mlxsw_sp_parsing_init(struct mlxsw_sp *mlxsw_sp) static void mlxsw_sp_parsing_fini(struct mlxsw_sp *mlxsw_sp) { mutex_destroy(&mlxsw_sp->parsing.lock); + WARN_ON_ONCE(refcount_read(&mlxsw_sp->parsing.parsing_depth_ref)); } struct mlxsw_sp_ipv6_addr_node { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 09e32778b012..4a73e2fe95ef 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -10381,11 +10381,23 @@ err_reg_write: old_inc_parsing_depth); return err; } + +static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp) +{ + bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth; + + mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth, + false); +} #else static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) { return 0; } + +static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp) +{ +} #endif static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) @@ -10615,6 +10627,7 @@ err_register_inet6addr_notifier: err_register_inetaddr_notifier: mlxsw_core_flush_owq(); err_dscp_init: + mlxsw_sp_mp_hash_fini(mlxsw_sp); err_mp_hash_init: mlxsw_sp_neigh_fini(mlxsw_sp); err_neigh_init: @@ -10655,6 +10668,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); mlxsw_core_flush_owq(); + mlxsw_sp_mp_hash_fini(mlxsw_sp); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_lb_rif_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); -- cgit From 13085e1b5cab8ad802904d72e6a6dae85ae0cd20 Mon Sep 17 00:00:00 2001 From: Wenjia Zhang Date: Mon, 13 Mar 2023 11:08:28 +0100 Subject: net/smc: fix deadlock triggered by cancel_delayed_work_syn() The following LOCKDEP was detected: Workqueue: events smc_lgr_free_work [smc] WARNING: possible circular locking dependency detected 6.1.0-20221027.rc2.git8.56bc5b569087.300.fc36.s390x+debug #1 Not tainted ------------------------------------------------------ kworker/3:0/176251 is trying to acquire lock: 00000000f1467148 ((wq_completion)smc_tx_wq-00000000#2){+.+.}-{0:0}, at: __flush_workqueue+0x7a/0x4f0 but task is already holding lock: 0000037fffe97dc8 ((work_completion)(&(&lgr->free_work)->work)){+.+.}-{0:0}, at: process_one_work+0x232/0x730 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 ((work_completion)(&(&lgr->free_work)->work)){+.+.}-{0:0}: __lock_acquire+0x58e/0xbd8 lock_acquire.part.0+0xe2/0x248 lock_acquire+0xac/0x1c8 __flush_work+0x76/0xf0 __cancel_work_timer+0x170/0x220 __smc_lgr_terminate.part.0+0x34/0x1c0 [smc] smc_connect_rdma+0x15e/0x418 [smc] __smc_connect+0x234/0x480 [smc] smc_connect+0x1d6/0x230 [smc] __sys_connect+0x90/0xc0 __do_sys_socketcall+0x186/0x370 __do_syscall+0x1da/0x208 system_call+0x82/0xb0 -> #3 (smc_client_lgr_pending){+.+.}-{3:3}: __lock_acquire+0x58e/0xbd8 lock_acquire.part.0+0xe2/0x248 lock_acquire+0xac/0x1c8 __mutex_lock+0x96/0x8e8 mutex_lock_nested+0x32/0x40 smc_connect_rdma+0xa4/0x418 [smc] __smc_connect+0x234/0x480 [smc] smc_connect+0x1d6/0x230 [smc] __sys_connect+0x90/0xc0 __do_sys_socketcall+0x186/0x370 __do_syscall+0x1da/0x208 system_call+0x82/0xb0 -> #2 (sk_lock-AF_SMC){+.+.}-{0:0}: __lock_acquire+0x58e/0xbd8 lock_acquire.part.0+0xe2/0x248 lock_acquire+0xac/0x1c8 lock_sock_nested+0x46/0xa8 smc_tx_work+0x34/0x50 [smc] process_one_work+0x30c/0x730 worker_thread+0x62/0x420 kthread+0x138/0x150 __ret_from_fork+0x3c/0x58 ret_from_fork+0xa/0x40 -> #1 ((work_completion)(&(&smc->conn.tx_work)->work)){+.+.}-{0:0}: __lock_acquire+0x58e/0xbd8 lock_acquire.part.0+0xe2/0x248 lock_acquire+0xac/0x1c8 process_one_work+0x2bc/0x730 worker_thread+0x62/0x420 kthread+0x138/0x150 __ret_from_fork+0x3c/0x58 ret_from_fork+0xa/0x40 -> #0 ((wq_completion)smc_tx_wq-00000000#2){+.+.}-{0:0}: check_prev_add+0xd8/0xe88 validate_chain+0x70c/0xb20 __lock_acquire+0x58e/0xbd8 lock_acquire.part.0+0xe2/0x248 lock_acquire+0xac/0x1c8 __flush_workqueue+0xaa/0x4f0 drain_workqueue+0xaa/0x158 destroy_workqueue+0x44/0x2d8 smc_lgr_free+0x9e/0xf8 [smc] process_one_work+0x30c/0x730 worker_thread+0x62/0x420 kthread+0x138/0x150 __ret_from_fork+0x3c/0x58 ret_from_fork+0xa/0x40 other info that might help us debug this: Chain exists of: (wq_completion)smc_tx_wq-00000000#2 --> smc_client_lgr_pending --> (work_completion)(&(&lgr->free_work)->work) Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((work_completion)(&(&lgr->free_work)->work)); lock(smc_client_lgr_pending); lock((work_completion) (&(&lgr->free_work)->work)); lock((wq_completion)smc_tx_wq-00000000#2); *** DEADLOCK *** 2 locks held by kworker/3:0/176251: #0: 0000000080183548 ((wq_completion)events){+.+.}-{0:0}, at: process_one_work+0x232/0x730 #1: 0000037fffe97dc8 ((work_completion) (&(&lgr->free_work)->work)){+.+.}-{0:0}, at: process_one_work+0x232/0x730 stack backtrace: CPU: 3 PID: 176251 Comm: kworker/3:0 Not tainted Hardware name: IBM 8561 T01 701 (z/VM 7.2.0) Call Trace: [<000000002983c3e4>] dump_stack_lvl+0xac/0x100 [<0000000028b477ae>] check_noncircular+0x13e/0x160 [<0000000028b48808>] check_prev_add+0xd8/0xe88 [<0000000028b49cc4>] validate_chain+0x70c/0xb20 [<0000000028b4bd26>] __lock_acquire+0x58e/0xbd8 [<0000000028b4cf6a>] lock_acquire.part.0+0xe2/0x248 [<0000000028b4d17c>] lock_acquire+0xac/0x1c8 [<0000000028addaaa>] __flush_workqueue+0xaa/0x4f0 [<0000000028addf9a>] drain_workqueue+0xaa/0x158 [<0000000028ae303c>] destroy_workqueue+0x44/0x2d8 [<000003ff8029af26>] smc_lgr_free+0x9e/0xf8 [smc] [<0000000028adf3d4>] process_one_work+0x30c/0x730 [<0000000028adf85a>] worker_thread+0x62/0x420 [<0000000028aeac50>] kthread+0x138/0x150 [<0000000028a63914>] __ret_from_fork+0x3c/0x58 [<00000000298503da>] ret_from_fork+0xa/0x40 INFO: lockdep is turned off. =================================================================== This deadlock occurs because cancel_delayed_work_sync() waits for the work(&lgr->free_work) to finish, while the &lgr->free_work waits for the work(lgr->tx_wq), which needs the sk_lock-AF_SMC, that is already used under the mutex_lock. The solution is to use cancel_delayed_work() instead, which kills off a pending work. Fixes: a52bcc919b14 ("net/smc: improve termination processing") Signed-off-by: Wenjia Zhang Reviewed-by: Jan Karcher Reviewed-by: Karsten Graul Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d52060b2680c..454356771cda 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1464,7 +1464,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) if (lgr->terminating) return; /* lgr already terminating */ /* cancel free_work sync, will terminate when lgr->freeing is set */ - cancel_delayed_work_sync(&lgr->free_work); + cancel_delayed_work(&lgr->free_work); lgr->terminating = 1; /* kill remaining link group connections */ -- cgit From 9d876d3ef27fa84355597ad269939772192356d8 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Mon, 13 Mar 2023 11:08:29 +0100 Subject: net/smc: Fix device de-init sequence CLC message initialization was not properly reversed in error handling path. Reported-and-suggested-by: Alexander Gordeev Signed-off-by: Stefan Raspl Signed-off-by: Wenjia Zhang Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/af_smc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ff6dd86bdc9f..c6b4a62276f6 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3501,6 +3501,7 @@ out_pnet: out_nl: smc_nl_exit(); out_ism: + smc_clc_exit(); smc_ism_exit(); out_pernet_subsys_stat: unregister_pernet_subsys(&smc_net_stat_ops); -- cgit From d8b228318935044dafe3a5bc07ee71a1f1424b8d Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Mon, 13 Mar 2023 23:00:45 +0100 Subject: net: usb: smsc75xx: Limit packet length to skb->len Packet length retrieved from skb data may be larger than the actual socket buffer length (up to 9026 bytes). In such case the cloned skb passed up the network stack will leak kernel memory contents. Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver") Signed-off-by: Szymon Heidrich Signed-off-by: David S. Miller --- drivers/net/usb/smsc75xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 95de452ff4da..db34f8d1d605 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -2212,7 +2212,8 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) dev->net->stats.rx_frame_errors++; } else { /* MAX_SINGLE_PACKET_SIZE + 4(CRC) + 2(COE) + 4(Vlan) */ - if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12))) { + if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12) || + size > skb->len)) { netif_dbg(dev, rx_err, dev->net, "size err rx_cmd_a=0x%08x\n", rx_cmd_a); -- cgit From 611e2dabb4b3243d176739fd6a5a34d007fa3f86 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 14 Mar 2023 00:34:26 +0000 Subject: net: ethernet: mtk_eth_soc: reset PCS state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reset the internal PCS state machine when changing interface mode. This prevents confusing the state machine when changing interface modes, e.g. from SGMII to 2500Base-X or vice-versa. Fixes: 7e538372694b ("net: ethernet: mediatek: Re-add support SGMII") Reviewed-by: Russell King (Oracle) Tested-by: Bjørn Mork Signed-off-by: Daniel Golle Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 4 ++++ drivers/net/ethernet/mediatek/mtk_sgmii.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index b65de174c3d9..084a6badef6d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -542,6 +542,10 @@ #define SGMII_SEND_AN_ERROR_EN BIT(11) #define SGMII_IF_MODE_MASK GENMASK(5, 1) +/* Register to reset SGMII design */ +#define SGMII_RESERVED_0 0x34 +#define SGMII_SW_RESET BIT(0) + /* Register to set SGMII speed, ANA RG_ Control Signals III*/ #define SGMSYS_ANA_RG_CS3 0x2028 #define RG_PHY_SPEED_MASK (BIT(2) | BIT(3)) diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c index bb00de1003ac..612f65bb0345 100644 --- a/drivers/net/ethernet/mediatek/mtk_sgmii.c +++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c @@ -88,6 +88,10 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode, regmap_update_bits(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL, SGMII_PHYA_PWD, SGMII_PHYA_PWD); + /* Reset SGMII PCS state */ + regmap_update_bits(mpcs->regmap, SGMII_RESERVED_0, + SGMII_SW_RESET, SGMII_SW_RESET); + if (interface == PHY_INTERFACE_MODE_2500BASEX) rgc3 = RG_PHY_SPEED_3_125G; else -- cgit From 6e933a804c7db8be64f367f33e63cd7dcc302ebb Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 14 Mar 2023 00:34:45 +0000 Subject: net: ethernet: mtk_eth_soc: only write values if needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only restart auto-negotiation and write link timer if actually necessary. This prevents losing the link in case of minor changes. Fixes: 7e538372694b ("net: ethernet: mediatek: Re-add support SGMII") Reviewed-by: Russell King (Oracle) Tested-by: Bjørn Mork Signed-off-by: Daniel Golle Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_sgmii.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c index 612f65bb0345..83976dc86887 100644 --- a/drivers/net/ethernet/mediatek/mtk_sgmii.c +++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c @@ -38,20 +38,16 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode, const unsigned long *advertising, bool permit_pause_to_mac) { + bool mode_changed = false, changed, use_an; struct mtk_pcs *mpcs = pcs_to_mtk_pcs(pcs); unsigned int rgc3, sgm_mode, bmcr; int advertise, link_timer; - bool changed, use_an; advertise = phylink_mii_c22_pcs_encode_advertisement(interface, advertising); if (advertise < 0) return advertise; - link_timer = phylink_get_link_timer_ns(interface); - if (link_timer < 0) - return link_timer; - /* Clearing IF_MODE_BIT0 switches the PCS to BASE-X mode, and * we assume that fixes it's speed at bitrate = line rate (in * other words, 1000Mbps or 2500Mbps). @@ -77,13 +73,16 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode, } if (use_an) { - /* FIXME: Do we need to set AN_RESTART here? */ - bmcr = SGMII_AN_RESTART | SGMII_AN_ENABLE; + bmcr = SGMII_AN_ENABLE; } else { bmcr = 0; } if (mpcs->interface != interface) { + link_timer = phylink_get_link_timer_ns(interface); + if (link_timer < 0) + return link_timer; + /* PHYA power down */ regmap_update_bits(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL, SGMII_PHYA_PWD, SGMII_PHYA_PWD); @@ -101,16 +100,17 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode, regmap_update_bits(mpcs->regmap, mpcs->ana_rgc3, RG_PHY_SPEED_3_125G, rgc3); + /* Setup the link timer */ + regmap_write(mpcs->regmap, SGMSYS_PCS_LINK_TIMER, link_timer / 2 / 8); + mpcs->interface = interface; + mode_changed = true; } /* Update the advertisement, noting whether it has changed */ regmap_update_bits_check(mpcs->regmap, SGMSYS_PCS_ADVERTISE, SGMII_ADVERTISE, advertise, &changed); - /* Setup the link timer and QPHY power up inside SGMIISYS */ - regmap_write(mpcs->regmap, SGMSYS_PCS_LINK_TIMER, link_timer / 2 / 8); - /* Update the sgmsys mode register */ regmap_update_bits(mpcs->regmap, SGMSYS_SGMII_MODE, SGMII_REMOTE_FAULT_DIS | SGMII_SPEED_DUPLEX_AN | @@ -118,7 +118,7 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode, /* Update the BMCR */ regmap_update_bits(mpcs->regmap, SGMSYS_PCS_CONTROL_1, - SGMII_AN_RESTART | SGMII_AN_ENABLE, bmcr); + SGMII_AN_ENABLE, bmcr); /* Release PHYA power down state * Only removing bit SGMII_PHYA_PWD isn't enough. @@ -132,7 +132,7 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode, usleep_range(50, 100); regmap_write(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL, 0); - return changed; + return changed || mode_changed; } static void mtk_pcs_restart_an(struct phylink_pcs *pcs) -- cgit From 0d3c9333d976af41d7dbc6bf4d9d2e95fbdf9c89 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Tue, 14 Mar 2023 13:50:35 +0800 Subject: drm/bridge: Fix returned array size name for atomic_get_input_bus_fmts kdoc The returned array size for input formats is set through atomic_get_input_bus_fmts()'s 'num_input_fmts' argument, so use 'num_input_fmts' to represent the array size in the function's kdoc, not 'num_output_fmts'. Fixes: 91ea83306bfa ("drm/bridge: Fix the bridge kernel doc") Fixes: f32df58acc68 ("drm/bridge: Add the necessary bits to support bus format negotiation") Signed-off-by: Liu Ying Reviewed-by: Robert Foss Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230314055035.3731179-1-victor.liu@nxp.com --- include/drm/drm_bridge.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 42f86327b40a..bf964cdfb330 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -423,11 +423,11 @@ struct drm_bridge_funcs { * * The returned array must be allocated with kmalloc() and will be * freed by the caller. If the allocation fails, NULL should be - * returned. num_output_fmts must be set to the returned array size. + * returned. num_input_fmts must be set to the returned array size. * Formats listed in the returned array should be listed in decreasing * preference order (the core will try all formats until it finds one * that works). When the format is not supported NULL should be - * returned and num_output_fmts should be set to 0. + * returned and num_input_fmts should be set to 0. * * This method is called on all elements of the bridge chain as part of * the bus format negotiation process that happens in -- cgit From 4028cbf867f70a3c599c9b0c9509334c56ed97d7 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 9 Mar 2023 16:24:46 +0100 Subject: drm/meson: dw-hdmi: Fix devm_regulator_*get_enable*() conversion again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_regulator_get_enable_optional() returns -ENODEV if requested optional regulator is not present. Adjust code for that, because in the 67d0a30128c9 I've incorrectly assumed that it also returns 0 when regulator is not present. Reported-by: Ricardo Cañuelo Fixes: 67d0a30128c9 ("drm/meson: dw-hdmi: Fix devm_regulator_*get_enable*() conversion") Signed-off-by: Marek Szyprowski Acked-by: Martin Blumenstingl Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230309152446.104913-1-m.szyprowski@samsung.com --- drivers/gpu/drm/meson/meson_dw_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c index 534621a13a34..3d046878ce6c 100644 --- a/drivers/gpu/drm/meson/meson_dw_hdmi.c +++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c @@ -718,7 +718,7 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master, dw_plat_data = &meson_dw_hdmi->dw_plat_data; ret = devm_regulator_get_enable_optional(dev, "hdmi"); - if (ret < 0) + if (ret < 0 && ret != -ENODEV) return ret; meson_dw_hdmi->hdmitx_apb = devm_reset_control_get_exclusive(dev, -- cgit From f2c7e3562b4c4f1699acc1538ebf3e75f5cced35 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Mar 2023 16:08:34 +1100 Subject: powerpc/mm: Fix false detection of read faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support detection of read faults with Radix execute-only memory, the vma_is_accessible() check in access_error() (which checks for PROT_NONE) was replaced with a check to see if VM_READ was missing, and if so, returns true to assert the fault was caused by a bad read. This is incorrect, as it ignores that both VM_WRITE and VM_EXEC imply read on powerpc, as defined in protection_map[]. This causes mappings containing VM_WRITE or VM_EXEC without VM_READ to misreport the cause of page faults, since the MMU is still allowing reads. Correct this by restoring the original vma_is_accessible() check for PROT_NONE mappings, and adding a separate check for Radix PROT_EXEC-only mappings. Fixes: 395cac7752b9 ("powerpc/mm: Support execute-only memory on the Radix MMU") Reported-by: Michal Suchánek Link: https://lore.kernel.org/r/20230308152702.GR19419@kitsune.suse.cz Tested-by: Benjamin Gray Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman Link: https://msgid.link/20230310050834.63105-1-ruscur@russell.cc --- arch/powerpc/mm/fault.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2bef19cc1b98..af46aa88422b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -271,11 +271,16 @@ static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma } /* - * Check for a read fault. This could be caused by a read on an - * inaccessible page (i.e. PROT_NONE), or a Radix MMU execute-only page. + * VM_READ, VM_WRITE and VM_EXEC all imply read permissions, as + * defined in protection_map[]. Read faults can only be caused by + * a PROT_NONE mapping, or with a PROT_EXEC-only mapping on Radix. */ - if (unlikely(!(vma->vm_flags & VM_READ))) + if (unlikely(!vma_is_accessible(vma))) return true; + + if (unlikely(radix_enabled() && ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC))) + return true; + /* * We should ideally do the vma pkey access check here. But in the * fault path, handle_mm_fault() also does the same check. To avoid -- cgit From 139f6973bf140c65d4d1d4bde5485badb4454d7a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 12 Mar 2023 14:25:23 +0100 Subject: wifi: mwifiex: mark OF related data as maybe unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver can be compile tested with !CONFIG_OF making certain data unused: drivers/net/wireless/marvell/mwifiex/sdio.c:498:34: error: ‘mwifiex_sdio_of_match_table’ defined but not used [-Werror=unused-const-variable=] drivers/net/wireless/marvell/mwifiex/pcie.c:175:34: error: ‘mwifiex_pcie_of_match_table’ defined but not used [-Werror=unused-const-variable=] Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230312132523.352182-1-krzysztof.kozlowski@linaro.org --- drivers/net/wireless/marvell/mwifiex/pcie.c | 2 +- drivers/net/wireless/marvell/mwifiex/sdio.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 5dcf61761a16..9a698a16a8f3 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -172,7 +172,7 @@ static const struct mwifiex_pcie_device mwifiex_pcie8997 = { .can_ext_scan = true, }; -static const struct of_device_id mwifiex_pcie_of_match_table[] = { +static const struct of_device_id mwifiex_pcie_of_match_table[] __maybe_unused = { { .compatible = "pci11ab,2b42" }, { .compatible = "pci1b4b,2b42" }, { } diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index c64e24c10ea6..a24bd40dd41a 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -495,7 +495,7 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = { {"EXTLAST", NULL, 0, 0xFE}, }; -static const struct of_device_id mwifiex_sdio_of_match_table[] = { +static const struct of_device_id mwifiex_sdio_of_match_table[] __maybe_unused = { { .compatible = "marvell,sd8787" }, { .compatible = "marvell,sd8897" }, { .compatible = "marvell,sd8978" }, -- cgit From 007ae9b268ba7553e479608cf9735d3c4672a2ab Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Tue, 14 Mar 2023 22:11:22 +0100 Subject: wifi: mac80211: Serialize ieee80211_handle_wake_tx_queue() ieee80211_handle_wake_tx_queue must not run concurrent multiple times. It calls ieee80211_txq_schedule_start() and the drivers migrated to iTXQ do not expect overlapping drv_tx() calls. This fixes 'c850e31f79f0 ("wifi: mac80211: add internal handler for wake_tx_queue")', which introduced ieee80211_handle_wake_tx_queue. Drivers started to use it with 'a790cc3a4fad ("wifi: mac80211: add wake_tx_queue callback to drivers")'. But only after fixing an independent bug with '4444bc2116ae ("wifi: mac80211: Proper mark iTXQs for resumption")' problematic concurrent calls really happened and exposed the initial issue. Fixes: c850e31f79f0 ("wifi: mac80211: add internal handler for wake_tx_queue") Reported-by: Thomas Mann Link: https://bugzilla.kernel.org/show_bug.cgi?id=217119 Link: https://lore.kernel.org/r/b8efebc6-4399-d0b8-b2a0-66843314616b@leemhuis.info/ Link: https://lore.kernel.org/r/b7445607128a6b9ed7c17fcdcf3679bfaf4aaea.camel@sipsolutions.net> CC: Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20230314211122.111688-1-alexander@wetzel-home.de [add missing spin_lock_init() noticed by Felix] Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/main.c | 2 ++ net/mac80211/util.c | 3 +++ 3 files changed, 8 insertions(+) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ecc232eb1ee8..e082582e0aa2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1284,6 +1284,9 @@ struct ieee80211_local { struct list_head active_txqs[IEEE80211_NUM_ACS]; u16 schedule_round[IEEE80211_NUM_ACS]; + /* serializes ieee80211_handle_wake_tx_queue */ + spinlock_t handle_wake_tx_queue_lock; + u16 airtime_flags; u32 aql_txq_limit_low[IEEE80211_NUM_ACS]; u32 aql_txq_limit_high[IEEE80211_NUM_ACS]; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 846528850612..ddf2b7811c55 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -802,6 +802,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, local->aql_threshold = IEEE80211_AQL_THRESHOLD; atomic_set(&local->aql_total_pending_airtime, 0); + spin_lock_init(&local->handle_wake_tx_queue_lock); + INIT_LIST_HEAD(&local->chanctx_list); mutex_init(&local->chanctx_mtx); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1a28fe5cb614..3aceb3b731bf 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -314,6 +314,8 @@ void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif); struct ieee80211_txq *queue; + spin_lock(&local->handle_wake_tx_queue_lock); + /* Use ieee80211_next_txq() for airtime fairness accounting */ ieee80211_txq_schedule_start(hw, txq->ac); while ((queue = ieee80211_next_txq(hw, txq->ac))) { @@ -321,6 +323,7 @@ void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, ieee80211_return_txq(hw, queue, false); } ieee80211_txq_schedule_end(hw, txq->ac); + spin_unlock(&local->handle_wake_tx_queue_lock); } EXPORT_SYMBOL(ieee80211_handle_wake_tx_queue); -- cgit From 63f886597085f346276e3b3c8974de0100d65f32 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 14 Mar 2023 13:11:05 +0900 Subject: block: null_blk: Fix handling of fake timeout request When injecting a fake timeout into the null_blk driver using fail_io_timeout, the request timeout handler does not execute blk_mq_complete_request(), so the complete callback is never executed for a timedout request. The null_blk driver also has a driver-specific fake timeout mechanism which does not have this problem. Fix the problem with fail_io_timeout by using the same meachanism as null_blk internal timeout feature, using the fake_timeout field of null_blk commands. Reported-by: Akinobu Mita Fixes: de3510e52b0a ("null_blk: fix command timeout completion handling") Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20230314041106.19173-2-damien.lemoal@opensource.wdc.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 4c601ca9552a..7d95ad203c97 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1413,8 +1413,7 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd) case NULL_IRQ_SOFTIRQ: switch (cmd->nq->dev->queue_mode) { case NULL_Q_MQ: - if (likely(!blk_should_fake_timeout(cmd->rq->q))) - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(cmd->rq); break; case NULL_Q_BIO: /* @@ -1675,7 +1674,8 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, cmd->rq = bd->rq; cmd->error = BLK_STS_OK; cmd->nq = nq; - cmd->fake_timeout = should_timeout_request(bd->rq); + cmd->fake_timeout = should_timeout_request(bd->rq) || + blk_should_fake_timeout(bd->rq->q); blk_mq_start_request(bd->rq); -- cgit From b6402014cab0481bdfd1ffff3e1dad714e8e1205 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 14 Mar 2023 13:11:06 +0900 Subject: block: null_blk: cleanup null_queue_rq() Use a local struct request pointer variable to avoid having to dereference struct blk_mq_queue_data multiple times. While at it, also fix the function argument indentation and remove a useless "else" after a return. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Pankaj Raghav Link: https://lore.kernel.org/r/20230314041106.19173-2-damien.lemoal@opensource.wdc.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 7d95ad203c97..9e6b032c8ecc 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1657,12 +1657,13 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq) } static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) + const struct blk_mq_queue_data *bd) { - struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + struct request *rq = bd->rq; + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); struct nullb_queue *nq = hctx->driver_data; - sector_t nr_sectors = blk_rq_sectors(bd->rq); - sector_t sector = blk_rq_pos(bd->rq); + sector_t nr_sectors = blk_rq_sectors(rq); + sector_t sector = blk_rq_pos(rq); const bool is_poll = hctx->type == HCTX_TYPE_POLL; might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); @@ -1671,15 +1672,15 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; } - cmd->rq = bd->rq; + cmd->rq = rq; cmd->error = BLK_STS_OK; cmd->nq = nq; - cmd->fake_timeout = should_timeout_request(bd->rq) || - blk_should_fake_timeout(bd->rq->q); + cmd->fake_timeout = should_timeout_request(rq) || + blk_should_fake_timeout(rq->q); - blk_mq_start_request(bd->rq); + blk_mq_start_request(rq); - if (should_requeue_request(bd->rq)) { + if (should_requeue_request(rq)) { /* * Alternate between hitting the core BUSY path, and the * driver driven requeue path @@ -1687,22 +1688,20 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, nq->requeue_selection++; if (nq->requeue_selection & 1) return BLK_STS_RESOURCE; - else { - blk_mq_requeue_request(bd->rq, true); - return BLK_STS_OK; - } + blk_mq_requeue_request(rq, true); + return BLK_STS_OK; } if (is_poll) { spin_lock(&nq->poll_lock); - list_add_tail(&bd->rq->queuelist, &nq->poll_list); + list_add_tail(&rq->queuelist, &nq->poll_list); spin_unlock(&nq->poll_lock); return BLK_STS_OK; } if (cmd->fake_timeout) return BLK_STS_OK; - return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq)); + return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq)); } static void cleanup_queue(struct nullb_queue *nq) -- cgit From a5fc1441af7719e93dc7a638a960befb694ade89 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 14 Mar 2023 19:33:32 +0100 Subject: io_uring/sqpoll: Do not set PF_NO_SETAFFINITY on sqpoll threads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users may specify a CPU where the sqpoll thread would run. This may conflict with cpuset operations because of strict PF_NO_SETAFFINITY requirement. That flag is unnecessary for polling "kernel" threads, see the reasoning in commit 01e68ce08a30 ("io_uring/io-wq: stop setting PF_NO_SETAFFINITY on io-wq workers"). Drop the flag on poll threads too. Fixes: 01e68ce08a30 ("io_uring/io-wq: stop setting PF_NO_SETAFFINITY on io-wq workers") Link: https://lore.kernel.org/all/20230314162559.pnyxdllzgw7jozgx@blackpad/ Signed-off-by: Michal Koutný Link: https://lore.kernel.org/r/20230314183332.25834-1-mkoutny@suse.com Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 1 - 1 file changed, 1 deletion(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 0119d3f1a556..9db4bc1f521a 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -233,7 +233,6 @@ static int io_sq_thread(void *data) set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu)); else set_cpus_allowed_ptr(current, cpu_online_mask); - current->flags |= PF_NO_SETAFFINITY; mutex_lock(&sqd->lock); while (1) { -- cgit From 54686b611eb054a5c84976e6f6e03788fa8e6a38 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 8 Mar 2023 15:41:32 +0100 Subject: MAINTAINERS: repair malformed T: entries in NVM EXPRESS DRIVERS The T: entries shall be composed of a SCM tree type (git, hg, quilt, stgit or topgit) and location. Add the SCM tree type to the T: entry, and reorder the file entries in alphabetical order. Fixes: b508fc354f6d ("nvme: update maintainers information") Signed-off-by: Lukas Bulwahn Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f305..1296426b3879 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14872,12 +14872,12 @@ M: Sagi Grimberg L: linux-nvme@lists.infradead.org S: Supported W: http://git.infradead.org/nvme.git -T: git://git.infradead.org/nvme.git +T: git git://git.infradead.org/nvme.git F: Documentation/nvme/ -F: drivers/nvme/host/ F: drivers/nvme/common/ -F: include/linux/nvme.h +F: drivers/nvme/host/ F: include/linux/nvme-*.h +F: include/linux/nvme.h F: include/uapi/linux/nvme_ioctl.h NVM EXPRESS FABRICS AUTHENTICATION @@ -14912,7 +14912,7 @@ M: Chaitanya Kulkarni L: linux-nvme@lists.infradead.org S: Supported W: http://git.infradead.org/nvme.git -T: git://git.infradead.org/nvme.git +T: git git://git.infradead.org/nvme.git F: drivers/nvme/target/ NVMEM FRAMEWORK -- cgit From 37f0dc2ec78af0c3f35dd05578763de059f6fe77 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 4 Mar 2023 07:13:45 +0800 Subject: nvme: fix handling single range discard request When investigating one customer report on warning in nvme_setup_discard, we observed the controller(nvme/tcp) actually exposes queue_max_discard_segments(req->q) == 1. Obviously the current code can't handle this situation, since contiguity merge like normal RW request is taken. Fix the issue by building range from request sector/nr_sectors directly. Fixes: b35ba01ea697 ("nvme: support ranged discard requests") Signed-off-by: Ming Lei Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c2730b116dc6..d4be525f8100 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -781,16 +781,26 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, range = page_address(ns->ctrl->discard_page); } - __rq_for_each_bio(bio, req) { - u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); - u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; - - if (n < segments) { - range[n].cattr = cpu_to_le32(0); - range[n].nlb = cpu_to_le32(nlb); - range[n].slba = cpu_to_le64(slba); + if (queue_max_discard_segments(req->q) == 1) { + u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req)); + u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9); + + range[0].cattr = cpu_to_le32(0); + range[0].nlb = cpu_to_le32(nlb); + range[0].slba = cpu_to_le64(slba); + n = 1; + } else { + __rq_for_each_bio(bio, req) { + u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); + u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; + + if (n < segments) { + range[n].cattr = cpu_to_le32(0); + range[n].nlb = cpu_to_le32(nlb); + range[n].slba = cpu_to_le64(slba); + } + n++; } - n++; } if (WARN_ON_ONCE(n != segments)) { -- cgit From a61d265533b7fe0026a02a49916aa564ffe38e4c Mon Sep 17 00:00:00 2001 From: Irvin Cote Date: Wed, 8 Mar 2023 18:05:08 -0300 Subject: nvme-pci: fixing memory leak in probe teardown path In case the nvme_probe teardown path is triggered the ctrl ref count does not reach 0 thus creating a memory leak upon failure of nvme_probe. Signed-off-by: Irvin Cote Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5b95c94ee40f..e77a8a873b1a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3073,6 +3073,7 @@ out_dev_unmap: nvme_dev_unmap(dev); out_uninit_ctrl: nvme_uninit_ctrl(&dev->ctrl); + nvme_put_ctrl(&dev->ctrl); return result; } -- cgit From 9630d80655bfe7e62e4aff2889dc4eae7ceeb887 Mon Sep 17 00:00:00 2001 From: Elmer Miroslav Mosher Golovin Date: Wed, 8 Mar 2023 19:19:29 +0300 Subject: nvme-pci: add NVME_QUIRK_BOGUS_NID for Netac NV3000 Added a quirk to fix the Netac NV3000 SSD reporting duplicate NGUIDs. Cc: Signed-off-by: Elmer Miroslav Mosher Golovin Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e77a8a873b1a..8a536d5300a4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3416,6 +3416,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1f40, 0x1202), /* Netac Technologies Co. NV3000 NVMe SSD */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1f40, 0x5236), /* Netac Technologies Co. NV7000 NVMe SSD */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e4B, 0x1001), /* MAXIO MAP1001 */ -- cgit From b65d44fa0fe072c91bf41cd8756baa2b4c77eff2 Mon Sep 17 00:00:00 2001 From: Philipp Geulen Date: Mon, 13 Mar 2023 11:11:50 +0100 Subject: nvme-pci: add NVME_QUIRK_BOGUS_NID for Lexar NM620 Added a quirk to fix Lexar NM620 1TB SSD reporting duplicate NGUIDs. Signed-off-by: Philipp Geulen Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8a536d5300a4..b615906263f3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3438,6 +3438,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), -- cgit From a3406352c54fbc476f4f6b98159c3ea1c7dbb6fc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 13 Mar 2023 10:56:22 +0200 Subject: nvme-tcp: fix opcode reporting in the timeout handler For non in-capsule writes we reuse the request pdu space for a h2cdata pdu in order to avoid over allocating space (either preallocate or dynamically upon receving an r2t pdu). However if the request times out the core expects to find the opcode in the start of the request, which we override. In order to prevent that, without sacrificing additional 24 bytes per request, we just use the tail of the command pdu space instead (last 24 bytes from the 72 bytes command pdu). That should make the command opcode always available, and we get away from allocating more space. If in the future we would need the last 24 bytes of the nvme command available we would need to allocate a dedicated space for it in the request, but until then we can avoid doing so. Reported-by: Akinobu Mita Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Tested-by: Akinobu Mita Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 7723a4989524..2e174fad57d7 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -208,6 +208,18 @@ static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue) return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; } +static inline void *nvme_tcp_req_cmd_pdu(struct nvme_tcp_request *req) +{ + return req->pdu; +} + +static inline void *nvme_tcp_req_data_pdu(struct nvme_tcp_request *req) +{ + /* use the pdu space in the back for the data pdu */ + return req->pdu + sizeof(struct nvme_tcp_cmd_pdu) - + sizeof(struct nvme_tcp_data_pdu); +} + static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req) { if (nvme_is_fabrics(req->req.cmd)) @@ -614,7 +626,7 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req) { - struct nvme_tcp_data_pdu *data = req->pdu; + struct nvme_tcp_data_pdu *data = nvme_tcp_req_data_pdu(req); struct nvme_tcp_queue *queue = req->queue; struct request *rq = blk_mq_rq_from_pdu(req); u32 h2cdata_sent = req->pdu_len; @@ -1038,7 +1050,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; - struct nvme_tcp_cmd_pdu *pdu = req->pdu; + struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); bool inline_data = nvme_tcp_has_inline_data(req); u8 hdgst = nvme_tcp_hdgst_len(queue); int len = sizeof(*pdu) + hdgst - req->offset; @@ -1077,7 +1089,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; - struct nvme_tcp_data_pdu *pdu = req->pdu; + struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req); u8 hdgst = nvme_tcp_hdgst_len(queue); int len = sizeof(*pdu) - req->offset + hdgst; int ret; @@ -2284,7 +2296,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) { struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; - struct nvme_tcp_cmd_pdu *pdu = req->pdu; + struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype; int qid = nvme_tcp_queue_id(req->queue); @@ -2323,7 +2335,7 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue, struct request *rq) { struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_tcp_cmd_pdu *pdu = req->pdu; + struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); struct nvme_command *c = &pdu->cmd; c->common.flags |= NVME_CMD_SGL_METABUF; @@ -2343,7 +2355,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, struct request *rq) { struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); - struct nvme_tcp_cmd_pdu *pdu = req->pdu; + struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); struct nvme_tcp_queue *queue = req->queue; u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0; blk_status_t ret; -- cgit From 7e87965d3807ab1f518ef2365f91d5ba6b0c5abe Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 13 Mar 2023 10:56:23 +0200 Subject: nvme-tcp: add nvme-tcp pdu size build protection Make sure that we don't somehow mess up the wire structures in the spec. Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 2e174fad57d7..42c0598c31f2 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2694,6 +2694,15 @@ static struct nvmf_transport_ops nvme_tcp_transport = { static int __init nvme_tcp_init_module(void) { + BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8); + BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72); + BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24); + BUILD_BUG_ON(sizeof(struct nvme_tcp_rsp_pdu) != 24); + BUILD_BUG_ON(sizeof(struct nvme_tcp_r2t_pdu) != 24); + BUILD_BUG_ON(sizeof(struct nvme_tcp_icreq_pdu) != 128); + BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128); + BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24); + nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); if (!nvme_tcp_wq) -- cgit From 8e19b87cfce2de2125f11363d7dea3d08f16ccae Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 9 Mar 2023 23:31:18 +0900 Subject: nvme-trace: show more opcode names We have more commands to show in the trace. Sync up. Signed-off-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4fad4aa245fb..779507ac750b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -812,6 +812,7 @@ enum nvme_opcode { nvme_opcode_name(nvme_cmd_compare), \ nvme_opcode_name(nvme_cmd_write_zeroes), \ nvme_opcode_name(nvme_cmd_dsm), \ + nvme_opcode_name(nvme_cmd_verify), \ nvme_opcode_name(nvme_cmd_resv_register), \ nvme_opcode_name(nvme_cmd_resv_report), \ nvme_opcode_name(nvme_cmd_resv_acquire), \ @@ -1144,10 +1145,14 @@ enum nvme_admin_opcode { nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ nvme_admin_opcode_name(nvme_admin_activate_fw), \ nvme_admin_opcode_name(nvme_admin_download_fw), \ + nvme_admin_opcode_name(nvme_admin_dev_self_test), \ nvme_admin_opcode_name(nvme_admin_ns_attach), \ nvme_admin_opcode_name(nvme_admin_keep_alive), \ nvme_admin_opcode_name(nvme_admin_directive_send), \ nvme_admin_opcode_name(nvme_admin_directive_recv), \ + nvme_admin_opcode_name(nvme_admin_virtual_mgmt), \ + nvme_admin_opcode_name(nvme_admin_nvme_mi_send), \ + nvme_admin_opcode_name(nvme_admin_nvme_mi_recv), \ nvme_admin_opcode_name(nvme_admin_dbbuf), \ nvme_admin_opcode_name(nvme_admin_format_nvm), \ nvme_admin_opcode_name(nvme_admin_security_send), \ -- cgit From 6173a77b7e9d3e202bdb9897b23f2a8afe7bf286 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 6 Mar 2023 10:13:13 +0900 Subject: nvmet: avoid potential UAF in nvmet_req_complete() An nvme target ->queue_response() operation implementation may free the request passed as argument. Such implementation potentially could result in a use after free of the request pointer when percpu_ref_put() is called in nvmet_req_complete(). Avoid such problem by using a local variable to save the sq pointer before calling __nvmet_req_complete(), thus avoiding dereferencing the req pointer after that function call. Fixes: a07b4970f464 ("nvmet: add a generic NVMe target") Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index f66ed13d7c11..3935165048e7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -756,8 +756,10 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status) void nvmet_req_complete(struct nvmet_req *req, u16 status) { + struct nvmet_sq *sq = req->sq; + __nvmet_req_complete(req, status); - percpu_ref_put(&req->sq->ref); + percpu_ref_put(&sq->ref); } EXPORT_SYMBOL_GPL(nvmet_req_complete); -- cgit From 6030363199e3a6341afb467ddddbed56640cbf6a Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 15 Mar 2023 14:20:32 +0800 Subject: block: sunvdc: add check for mdesc_grab() returning NULL In vdc_port_probe(), we should check the return value of mdesc_grab() as it may return NULL, which can cause potential NPD bug. Fixes: 43fdf27470b2 ("[SPARC64]: Abstract out mdesc accesses for better MD update handling.") Signed-off-by: Liang He Link: https://lore.kernel.org/r/20230315062032.1741692-1-windhl@126.com [axboe: style cleanup] Signed-off-by: Jens Axboe --- drivers/block/sunvdc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index fb855da971ee..9fa821fa76b0 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -972,6 +972,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) print_version(); hp = mdesc_grab(); + if (!hp) + return -ENODEV; err = -ENODEV; if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) { -- cgit From 5f27571382ca42daa3e3d40d1b252bf18c2b61d2 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 23 Feb 2023 17:12:26 +0800 Subject: block: count 'ios' and 'sectors' when io is done for bio-based device While using iostat for raid, I observed very strange 'await' occasionally, and turns out it's due to that 'ios' and 'sectors' is counted in bdev_start_io_acct(), while 'nsecs' is counted in bdev_end_io_acct(). I'm not sure why they are ccounted like that but I think this behaviour is obviously wrong because user will get wrong disk stats. Fix the problem by counting 'ios' and 'sectors' when io is done, like what rq-based device does. Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function") Signed-off-by: Yu Kuai Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230223091226.1135678-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-core.c | 16 ++++++---------- drivers/md/dm.c | 6 +++--- drivers/nvme/host/multipath.c | 8 ++++---- include/linux/blkdev.h | 5 ++--- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 9e5e0277a4d9..42926e6cb83c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -959,16 +959,11 @@ again: } } -unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, enum req_op op, +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time) { - const int sgrp = op_stat_group(op); - part_stat_lock(); update_io_ticks(bdev, start_time, false); - part_stat_inc(bdev, ios[sgrp]); - part_stat_add(bdev, sectors[sgrp], sectors); part_stat_local_inc(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); @@ -984,13 +979,12 @@ EXPORT_SYMBOL(bdev_start_io_acct); */ unsigned long bio_start_io_acct(struct bio *bio) { - return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), - bio_op(bio), jiffies); + return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies); } EXPORT_SYMBOL_GPL(bio_start_io_acct); void bdev_end_io_acct(struct block_device *bdev, enum req_op op, - unsigned long start_time) + unsigned int sectors, unsigned long start_time) { const int sgrp = op_stat_group(op); unsigned long now = READ_ONCE(jiffies); @@ -998,6 +992,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op, part_stat_lock(); update_io_ticks(bdev, now, true); + part_stat_inc(bdev, ios[sgrp]); + part_stat_add(bdev, sectors[sgrp], sectors); part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_local_dec(bdev, in_flight[op_is_write(op)]); part_stat_unlock(); @@ -1007,7 +1003,7 @@ EXPORT_SYMBOL(bdev_end_io_acct); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev) { - bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); + bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time); } EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index eace45a18d45..f5cc330bb549 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end) sectors = io->sectors; if (!end) - bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio), - start_time); + bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time); else - bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time); + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors, + start_time); if (static_branch_unlikely(&stats_enabled) && unlikely(dm_stats_used(&md->stats))) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index fc39d01e7b63..9171452e2f6d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq) return; nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; - nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, - blk_rq_bytes(rq) >> SECTOR_SHIFT, - req_op(rq), jiffies); + nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq), + jiffies); } EXPORT_SYMBOL_GPL(nvme_mpath_start_request); @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq) if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) return; bdev_end_io_acct(ns->head->disk->part0, req_op(rq), - nvme_req(rq)->start_time); + blk_rq_bytes(rq) >> SECTOR_SHIFT, + nvme_req(rq)->start_time); } void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d1aee08f8c18..941304f17492 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } -unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, enum req_op op, +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time); void bdev_end_io_acct(struct block_device *bdev, enum req_op op, - unsigned long start_time); + unsigned int sectors, unsigned long start_time); unsigned long bio_start_io_acct(struct bio *bio); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, -- cgit From 6c0f5898836c05c6d850a750ed7940ba29e4e6c5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 13 Mar 2023 13:29:17 -0700 Subject: md: select BLOCK_LEGACY_AUTOLOAD When BLOCK_LEGACY_AUTOLOAD is not enable, mdadm is not able to activate new arrays unless "CREATE names=yes" appears in mdadm.conf As this is a regression we need to always enable BLOCK_LEGACY_AUTOLOAD for when MD is selected - at least until mdadm is updated and the updates widely available. Cc: stable@vger.kernel.org # v5.18+ Fixes: fbdee71bb5d8 ("block: deprecate autoloading based on dev_t") Signed-off-by: NeilBrown Signed-off-by: Song Liu --- drivers/md/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 998a5cfdbc4e..662d219c39bf 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -16,6 +16,10 @@ if MD config BLK_DEV_MD tristate "RAID support" select BLOCK_HOLDER_DEPRECATED if SYSFS + # BLOCK_LEGACY_AUTOLOAD requirement should be removed + # after relevant mdadm enhancements - to make "names=yes" + # the default - are widely available. + select BLOCK_LEGACY_AUTOLOAD help This driver lets you combine several hard disk partitions into one logical block device. This can be used to simply append one -- cgit From 1c3ab6dfa0692c3626580a508cf84e794201b357 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 2 Mar 2023 09:54:12 +0800 Subject: btrfs: handle missing chunk mapping more gracefully [BUG] During my scrub rework, I did a stupid thing like this: bio->bi_iter.bi_sector = stripe->logical; btrfs_submit_bio(fs_info, bio, stripe->mirror_num); Above bi_sector assignment is using logical address directly, which lacks ">> SECTOR_SHIFT". This results a read on a range which has no chunk mapping. This results the following crash: BTRFS critical (device dm-1): unable to find logical 11274289152 length 65536 assertion failed: !IS_ERR(em), in fs/btrfs/volumes.c:6387 Sure this is all my fault, but this shows a possible problem in real world, that some bit flip in file extents/tree block can point to unmapped ranges, and trigger above ASSERT(), or if CONFIG_BTRFS_ASSERT is not configured, cause invalid pointer access. [PROBLEMS] In the above call chain, we just don't handle the possible error from btrfs_get_chunk_map() inside __btrfs_map_block(). [FIX] The fix is straightforward, replace the ASSERT() with proper error handling (callers handle errors already). Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7823168c08a6..6d0124b6e79e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6363,7 +6363,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ASSERT(op != BTRFS_MAP_DISCARD); em = btrfs_get_chunk_map(fs_info, logical, *length); - ASSERT(!IS_ERR(em)); + if (IS_ERR(em)) + return PTR_ERR(em); map = em->map_lookup; data_stripes = nr_data_stripes(map); -- cgit From 10a8857a1beaa015efba7d56e06243d484549fb6 Mon Sep 17 00:00:00 2001 From: Sweet Tea Dorminy Date: Wed, 8 Mar 2023 10:58:36 -0500 Subject: btrfs: fix compiler warning on SPARC/PA-RISC handling fscrypt_setup_filename Commit 1ec49744ba83 ("btrfs: turn on -Wmaybe-uninitialized") exposed that on SPARC and PA-RISC, gcc is unaware that fscrypt_setup_filename() only returns negative error values or 0. This ultimately results in a maybe-uninitialized warning in btrfs_lookup_dentry(). Change to only return negative error values or 0 from fscrypt_setup_filename() at the relevant call site, and assert that no positive error codes are returned (which would have wider implications involving other users). Reported-by: Guenter Roeck Link: https://lore.kernel.org/all/481b19b5-83a0-4793-b4fd-194ad7b978c3@roeck-us.net/ Signed-off-by: Sweet Tea Dorminy Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 44e9acc77a74..e99432e4912e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5421,8 +5421,13 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry, return -ENOMEM; ret = fscrypt_setup_filename(&dir->vfs_inode, &dentry->d_name, 1, &fname); - if (ret) + if (ret < 0) goto out; + /* + * fscrypt_setup_filename() should never return a positive value, but + * gcc on sparc/parisc thinks it can, so assert that doesn't happen. + */ + ASSERT(ret == 0); /* This needs to handle no-key deletions later on */ -- cgit From 9e1cdf0c354e46e428c0e0cab008abbe81b6013d Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 13 Mar 2023 16:29:49 +0900 Subject: btrfs: zoned: fix btrfs_can_activate_zone() to support DUP profile btrfs_can_activate_zone() returns true if at least one device has one zone available for activation. This is OK for the single profile, but not OK for DUP profile. We need two zones to create a DUP block group. Fix it by properly handling the case with the profile flags. Fixes: 265f7237dd25 ("btrfs: zoned: allow DUP on meta-data block groups") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index f95b2c94d619..0a330d5410a0 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2086,11 +2086,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) if (!device->bdev) continue; - if (!zinfo->max_active_zones || - atomic_read(&zinfo->active_zones_left)) { + if (!zinfo->max_active_zones) { ret = true; break; } + + switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: /* single */ + ret = (atomic_read(&zinfo->active_zones_left) >= 1); + break; + case BTRFS_BLOCK_GROUP_DUP: + ret = (atomic_read(&zinfo->active_zones_left) >= 2); + break; + } + if (ret) + break; } mutex_unlock(&fs_info->chunk_mutex); -- cgit From bf1f1fec2724a33b67ec12032402ea75f2a83622 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 1 Mar 2023 16:14:42 -0500 Subject: btrfs: rename BTRFS_FS_NO_OVERCOMMIT to BTRFS_FS_ACTIVE_ZONE_TRACKING This flag only gets set when we're doing active zone tracking, and we're going to need to use this flag for things related to this behavior. Rename the flag to represent what it actually means for the file system so it can be used in other ways and still make sense. Reviewed-by: Naohiro Aota Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/fs.h | 7 ++----- fs/btrfs/space-info.c | 2 +- fs/btrfs/zoned.c | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 4c477eae6891..24cd49229408 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -120,11 +120,8 @@ enum { /* Indicate that we want to commit the transaction. */ BTRFS_FS_NEED_TRANS_COMMIT, - /* - * Indicate metadata over-commit is disabled. This is set when active - * zone tracking is needed. - */ - BTRFS_FS_NO_OVERCOMMIT, + /* This is set when active zone tracking is needed. */ + BTRFS_FS_ACTIVE_ZONE_TRACKING, /* * Indicate if we have some features changed, this is mostly for diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 69c09508afb5..2237685d1ed0 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -407,7 +407,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; used = btrfs_space_info_used(space_info, true); - if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) && + if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) avail = 0; else diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 0a330d5410a0..22d1e930a916 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -524,8 +524,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } atomic_set(&zone_info->active_zones_left, max_active_zones - nactive); - /* Overcommit does not work well with active zone tacking. */ - set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags); + set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags); } /* Validate superblock log */ -- cgit From df384da5a49cace5c5e3100803dfd563fd982f93 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 1 Mar 2023 16:14:43 -0500 Subject: btrfs: use temporary variable for space_info in btrfs_update_block_group We do cache->space_info->counter += num_bytes; everywhere in here. This is makes the lines longer than they need to be, and will be especially noticeable when we add the active tracking in, so add a temp variable for the space_info so this is cleaner. Reviewed-by: Naohiro Aota Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 0ef8b8926bfa..1a31bcd554d0 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3476,6 +3476,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&info->delalloc_root_lock); while (total) { + struct btrfs_space_info *space_info; bool reclaim = false; cache = btrfs_lookup_block_group(info, bytenr); @@ -3483,6 +3484,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, ret = -ENOENT; break; } + space_info = cache->space_info; factor = btrfs_bg_type_to_factor(cache->flags); /* @@ -3497,7 +3499,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, byte_in_group = bytenr - cache->start; WARN_ON(byte_in_group > cache->length); - spin_lock(&cache->space_info->lock); + spin_lock(&space_info->lock); spin_lock(&cache->lock); if (btrfs_test_opt(info, SPACE_CACHE) && @@ -3510,24 +3512,24 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, old_val += num_bytes; cache->used = old_val; cache->reserved -= num_bytes; - cache->space_info->bytes_reserved -= num_bytes; - cache->space_info->bytes_used += num_bytes; - cache->space_info->disk_used += num_bytes * factor; + space_info->bytes_reserved -= num_bytes; + space_info->bytes_used += num_bytes; + space_info->disk_used += num_bytes * factor; spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); + spin_unlock(&space_info->lock); } else { old_val -= num_bytes; cache->used = old_val; cache->pinned += num_bytes; - btrfs_space_info_update_bytes_pinned(info, - cache->space_info, num_bytes); - cache->space_info->bytes_used -= num_bytes; - cache->space_info->disk_used -= num_bytes * factor; + btrfs_space_info_update_bytes_pinned(info, space_info, + num_bytes); + space_info->bytes_used -= num_bytes; + space_info->disk_used -= num_bytes * factor; reclaim = should_reclaim_block_group(cache, num_bytes); spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); + spin_unlock(&space_info->lock); set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, -- cgit From fa2068d7e922b434eba5bfb0131e6d39febfdb48 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 13 Mar 2023 16:06:13 +0900 Subject: btrfs: zoned: count fresh BG region as zone unusable The naming of space_info->active_total_bytes is misleading. It counts not only active block groups but also full ones which are previously active but now inactive. That confusion results in a bug not counting the full BGs into active_total_bytes on mount time. For a background, there are three kinds of block groups in terms of activation. 1. Block groups never activated 2. Block groups currently active 3. Block groups previously active and currently inactive (due to fully written or zone finish) What we really wanted to exclude from "total_bytes" is the total size of BGs #1. They seem empty and allocatable but since they are not activated, we cannot rely on them to do the space reservation. And, since BGs #1 never get activated, they should have no "used", "reserved" and "pinned" bytes. OTOH, BGs #3 can be counted in the "total", since they are already full we cannot allocate from them anyway. For them, "total_bytes == used + reserved + pinned + zone_unusable" should hold. Tracking #2 and #3 as "active_total_bytes" (current implementation) is confusing. And, tracking #1 and subtract that properly from "total_bytes" every time you need space reservation is cumbersome. Instead, we can count the whole region of a newly allocated block group as zone_unusable. Then, once that block group is activated, release [0 .. zone_capacity] from the zone_unusable counters. With this, we can eliminate the confusing ->active_total_bytes and the code will be common among regular and the zoned mode. Also, no additional counter is needed with this approach. Fixes: 6a921de58992 ("btrfs: zoned: introduce space_info->active_total_bytes") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 8 +++++++- fs/btrfs/zoned.c | 24 +++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0d250d052487..d84cef89cdff 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2693,8 +2693,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); spin_lock(&ctl->tree_lock); + /* Count initial region as zone_unusable until it gets activated. */ if (!used) to_free = size; + else if (initial && + test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) && + (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))) + to_free = 0; else if (initial) to_free = block_group->zone_capacity; else if (offset >= block_group->alloc_offset) @@ -2722,7 +2727,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, reclaimable_unusable = block_group->zone_unusable - (block_group->length - block_group->zone_capacity); /* All the region is now unusable. Mark it as unused and reclaim */ - if (block_group->zone_unusable == block_group->length) { + if (block_group->zone_unusable == block_group->length && + block_group->alloc_offset) { btrfs_mark_bg_unused(block_group); } else if (bg_reclaim_threshold && reclaimable_unusable >= diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 22d1e930a916..6828712578ca 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1580,9 +1580,19 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) return; WARN_ON(cache->bytes_super != 0); - unusable = (cache->alloc_offset - cache->used) + - (cache->length - cache->zone_capacity); - free = cache->zone_capacity - cache->alloc_offset; + + /* Check for block groups never get activated */ + if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) && + cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) && + !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) && + cache->alloc_offset == 0) { + unusable = cache->length; + free = 0; + } else { + unusable = (cache->alloc_offset - cache->used) + + (cache->length - cache->zone_capacity); + free = cache->zone_capacity - cache->alloc_offset; + } /* We only need ->free_space in ALLOC_SEQ block groups */ cache->cached = BTRFS_CACHE_FINISHED; @@ -1901,7 +1911,11 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) /* Successfully activated all the zones */ set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); - space_info->active_total_bytes += block_group->length; + WARN_ON(block_group->alloc_offset != 0); + if (block_group->zone_unusable == block_group->length) { + block_group->zone_unusable = block_group->length - block_group->zone_capacity; + space_info->bytes_zone_unusable -= block_group->zone_capacity; + } spin_unlock(&block_group->lock); btrfs_try_granting_tickets(fs_info, space_info); spin_unlock(&space_info->lock); @@ -2265,7 +2279,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) u64 avail; spin_lock(&block_group->lock); - if (block_group->reserved || + if (block_group->reserved || block_group->alloc_offset == 0 || (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) { spin_unlock(&block_group->lock); continue; -- cgit From e15acc25880cf048dba9df94d76ed7e7e10040e6 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Mon, 13 Mar 2023 16:06:14 +0900 Subject: btrfs: zoned: drop space_info->active_total_bytes The space_info->active_total_bytes is no longer necessary as we now count the region of newly allocated block group as zone_unusable. Drop its usage. Fixes: 6a921de58992 ("btrfs: zoned: introduce space_info->active_total_bytes") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 6 ------ fs/btrfs/space-info.c | 40 +++++++++------------------------------- fs/btrfs/space-info.h | 2 -- fs/btrfs/zoned.c | 4 ---- 4 files changed, 9 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 1a31bcd554d0..5fc670c27f86 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1175,14 +1175,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, < block_group->zone_unusable); WARN_ON(block_group->space_info->disk_total < block_group->length * factor); - WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, - &block_group->runtime_flags) && - block_group->space_info->active_total_bytes - < block_group->length); } block_group->space_info->total_bytes -= block_group->length; - if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) - block_group->space_info->active_total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); block_group->space_info->bytes_zone_unusable -= diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 2237685d1ed0..3eecce86f63f 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -308,8 +308,6 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, ASSERT(found); spin_lock(&found->lock); found->total_bytes += block_group->length; - if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) - found->active_total_bytes += block_group->length; found->disk_total += block_group->length * factor; found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; @@ -379,22 +377,6 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, return avail; } -static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info) -{ - /* - * On regular filesystem, all total_bytes are always writable. On zoned - * filesystem, there may be a limitation imposed by max_active_zones. - * For metadata allocation, we cannot finish an existing active block - * group to avoid a deadlock. Thus, we need to consider only the active - * groups to be writable for metadata space. - */ - if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) - return space_info->total_bytes; - - return space_info->active_total_bytes; -} - int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 bytes, enum btrfs_reserve_flush_enum flush) @@ -413,7 +395,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, else avail = calc_available_free_space(fs_info, space_info, flush); - if (used + bytes < writable_total_bytes(fs_info, space_info) + avail) + if (used + bytes < space_info->total_bytes + avail) return 1; return 0; } @@ -449,7 +431,7 @@ again: ticket = list_first_entry(head, struct reserve_ticket, list); /* Check and see if our ticket can be satisfied now. */ - if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) || + if ((used + ticket->bytes <= space_info->total_bytes) || btrfs_can_overcommit(fs_info, space_info, ticket->bytes, flush)) { btrfs_space_info_update_bytes_may_use(fs_info, @@ -829,7 +811,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, { u64 used; u64 avail; - u64 total; u64 to_reclaim = space_info->reclaim_size; lockdep_assert_held(&space_info->lock); @@ -844,9 +825,8 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, * space. If that's the case add in our overage so we make sure to put * appropriate pressure on the flushing state machine. */ - total = writable_total_bytes(fs_info, space_info); - if (total + avail < used) - to_reclaim += used - (total + avail); + if (space_info->total_bytes + avail < used) + to_reclaim += used - (space_info->total_bytes + avail); return to_reclaim; } @@ -856,11 +836,10 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, { u64 global_rsv_size = fs_info->global_block_rsv.reserved; u64 ordered, delalloc; - u64 total = writable_total_bytes(fs_info, space_info); u64 thresh; u64 used; - thresh = mult_perc(total, 90); + thresh = mult_perc(space_info->total_bytes, 90); lockdep_assert_held(&space_info->lock); @@ -923,8 +902,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, BTRFS_RESERVE_FLUSH_ALL); used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_readonly + global_rsv_size; - if (used < total) - thresh += total - used; + if (used < space_info->total_bytes) + thresh += space_info->total_bytes - used; thresh >>= space_info->clamp; used = space_info->bytes_pinned; @@ -1651,7 +1630,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, * can_overcommit() to ensure we can overcommit to continue. */ if (!pending_tickets && - ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) || + ((used + orig_bytes <= space_info->total_bytes) || btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, orig_bytes); @@ -1665,8 +1644,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, */ if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) { used = btrfs_space_info_used(space_info, false); - if (used + orig_bytes <= - writable_total_bytes(fs_info, space_info)) { + if (used + orig_bytes <= space_info->total_bytes) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, orig_bytes); ret = 0; diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index fc99ea2b0c34..2033b71b18ce 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -96,8 +96,6 @@ struct btrfs_space_info { u64 bytes_may_use; /* number of bytes that may be used for delalloc/allocations */ u64 bytes_readonly; /* total bytes that are read only */ - /* Total bytes in the space, but only accounts active block groups. */ - u64 active_total_bytes; u64 bytes_zone_unusable; /* total bytes that are unusable until resetting the device zone */ diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 6828712578ca..45d04092f2f8 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2316,10 +2316,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) return 0; - /* No more block groups to activate */ - if (space_info->active_total_bytes == space_info->total_bytes) - return 0; - for (;;) { int ret; bool need_finish = false; -- cgit From eb81a2ed4f52be831c9fb879752d89645a312c13 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 14 Mar 2023 04:47:35 +0000 Subject: perf/core: Fix perf_output_begin parameter is incorrectly invoked in perf_event_bpf_output syzkaller reportes a KASAN issue with stack-out-of-bounds. The call trace is as follows: dump_stack+0x9c/0xd3 print_address_description.constprop.0+0x19/0x170 __kasan_report.cold+0x6c/0x84 kasan_report+0x3a/0x50 __perf_event_header__init_id+0x34/0x290 perf_event_header__init_id+0x48/0x60 perf_output_begin+0x4a4/0x560 perf_event_bpf_output+0x161/0x1e0 perf_iterate_sb_cpu+0x29e/0x340 perf_iterate_sb+0x4c/0xc0 perf_event_bpf_event+0x194/0x2c0 __bpf_prog_put.constprop.0+0x55/0xf0 __cls_bpf_delete_prog+0xea/0x120 [cls_bpf] cls_bpf_delete_prog_work+0x1c/0x30 [cls_bpf] process_one_work+0x3c2/0x730 worker_thread+0x93/0x650 kthread+0x1b8/0x210 ret_from_fork+0x1f/0x30 commit 267fb27352b6 ("perf: Reduce stack usage of perf_output_begin()") use on-stack struct perf_sample_data of the caller function. However, perf_event_bpf_output uses incorrect parameter to convert small-sized data (struct perf_bpf_event) into large-sized data (struct perf_sample_data), which causes memory overwriting occurs in __perf_event_header__init_id. Fixes: 267fb27352b6 ("perf: Reduce stack usage of perf_output_begin()") Signed-off-by: Yang Jihong Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230314044735.56551-1-yangjihong1@huawei.com --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f79fd8b87f75..296617edbda1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9187,7 +9187,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data) perf_event_header__init_id(&bpf_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, data, event, + ret = perf_output_begin(&handle, &sample, event, bpf_event->event_id.header.size); if (ret) return; -- cgit From baf1b12a67f5b24f395baca03e442ce27cab0c18 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 13 Mar 2023 10:16:08 -0700 Subject: perf: fix perf_event_context->time Time readers rely on perf_event_context->[time|timestamp|timeoffset] to get accurate time_enabled and time_running for an event. The difference between ctx->timestamp and ctx->time is the among of time when the context is not enabled. __update_context_time(ctx, false) is used to increase timestamp, but not time. Therefore, it should only be called in ctx_sched_in() when EVENT_TIME was not enabled. Fixes: 09f5e7dc7ad7 ("perf: Fix perf_event_read_local() time") Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Acked-by: Namhyung Kim Link: https://lkml.kernel.org/r/20230313171608.298734-1-song@kernel.org --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 296617edbda1..52b4aa0b3bd1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3872,7 +3872,7 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type) if (likely(!ctx->nr_events)) return; - if (is_active ^ EVENT_TIME) { + if (!(is_active & EVENT_TIME)) { /* start ctx time */ __update_context_time(ctx, false); perf_cgroup_set_timestamp(cpuctx); -- cgit From fd0815f632c24878e325821943edccc7fde947a2 Mon Sep 17 00:00:00 2001 From: Budimir Markovic Date: Wed, 15 Mar 2023 00:29:01 -0700 Subject: perf: Fix check before add_event_to_groups() in perf_group_detach() Events should only be added to a groups rb tree if they have not been removed from their context by list_del_event(). Since remove_on_exec made it possible to call list_del_event() on individual events before they are detached from their group, perf_group_detach() should check each sibling's attach_state before calling add_event_to_groups() on it. Fixes: 2e498d0a74e5 ("perf: Add support for event removal on exec") Signed-off-by: Budimir Markovic Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/ZBFzvQV9tEqoHEtH@gentoo --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 52b4aa0b3bd1..fb3e436bcd4a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2163,7 +2163,7 @@ static void perf_group_detach(struct perf_event *event) /* Inherit group flags from the previous leader */ sibling->group_caps = event->group_caps; - if (!RB_EMPTY_NODE(&event->group_node)) { + if (sibling->attach_state & PERF_ATTACH_CONTEXT) { add_event_to_groups(sibling, event->ctx); if (sibling->state == PERF_EVENT_STATE_ACTIVE) -- cgit From 709671ffb15dcd1b4f6afe2a9d8c67c7c4ead4a1 Mon Sep 17 00:00:00 2001 From: Saaem Rizvi Date: Mon, 27 Feb 2023 18:55:07 -0500 Subject: drm/amd/display: Remove OTG DIV register write for Virtual signals. [WHY] Hot plugging and then hot unplugging leads to k1 and k2 values to change, as signal is detected as a virtual signal on hot unplug. Writing these values to OTG_PIXEL_RATE_DIV register might cause primary display to blank (known hw bug). [HOW] No longer write k1 and k2 values to register if signal is virtual, we have safe guards in place in the case that k1 and k2 is unassigned so that an unknown value is not written to the register either. Cc: stable@vger.kernel.org Cc: Mario Limonciello Reviewed-by: Samson Tam Reviewed-by: Alvin Lee Acked-by: Qingqing Zhuo Signed-off-by: Saaem Rizvi Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 16f892125b6f..9d14045cccd6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1104,7 +1104,7 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign *k2_div = PIXEL_RATE_DIV_BY_2; else *k2_div = PIXEL_RATE_DIV_BY_4; - } else if (dc_is_dp_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) { + } else if (dc_is_dp_signal(stream->signal)) { if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; -- cgit From 56574f89dbd84004c3fd6485bcaafb5aa9b8be14 Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Thu, 3 Nov 2022 22:29:31 -0400 Subject: drm/amd/display: Do not set DRR on pipe Commit [WHY] Writing to DRR registers such as OTG_V_TOTAL_MIN on the same frame as a pipe commit can cause underflow. Cc: stable@vger.kernel.org Cc: Mario Limonciello Reviewed-by: Jun Lei Acked-by: Qingqing Zhuo Signed-off-by: Wesley Chalmers Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 3b4d4d68359b..df787fcf8e86 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -998,8 +998,5 @@ void dcn30_prepare_bandwidth(struct dc *dc, dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz); dcn20_prepare_bandwidth(dc, context); - - dc_dmub_srv_p_state_delegate(dc, - context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching, context); } -- cgit From cbd6c1b17d3b42b7935526a86ad5f66838767d03 Mon Sep 17 00:00:00 2001 From: Cruise Hung Date: Thu, 2 Mar 2023 10:33:51 +0800 Subject: drm/amd/display: Fix DP MST sinks removal issue [Why] In USB4 DP tunneling, it's possible to have this scenario that the path becomes unavailable and CM tears down the path a little bit late. So, in this case, the HPD is high but fails to read any DPCD register. That causes the link connection type to be set to sst. And not all sinks are removed behind the MST branch. [How] Restore the link connection type if it fails to read DPCD register. Cc: stable@vger.kernel.org Cc: Mario Limonciello Reviewed-by: Wenjing Liu Acked-by: Qingqing Zhuo Signed-off-by: Cruise Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_detection.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 38216c789d77..f70025ef7b69 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -855,6 +855,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, struct dc_sink *prev_sink = NULL; struct dpcd_caps prev_dpcd_caps; enum dc_connection_type new_connection_type = dc_connection_none; + enum dc_connection_type pre_connection_type = link->type; const uint32_t post_oui_delay = 30; // 30ms DC_LOGGER_INIT(link->ctx->logger); @@ -957,6 +958,8 @@ static bool detect_link_and_local_sink(struct dc_link *link, } if (!detect_dp(link, &sink_caps, reason)) { + link->type = pre_connection_type; + if (prev_sink) dc_sink_release(prev_sink); return false; @@ -1244,11 +1247,16 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason) bool is_delegated_to_mst_top_mgr = false; enum dc_connection_type pre_link_type = link->type; + DC_LOGGER_INIT(link->ctx->logger); + is_local_sink_detect_success = detect_link_and_local_sink(link, reason); if (is_local_sink_detect_success && link->local_sink) verify_link_capability(link, link->local_sink, reason); + DC_LOG_DC("%s: link_index=%d is_local_sink_detect_success=%d pre_link_type=%d link_type=%d\n", __func__, + link->link_index, is_local_sink_detect_success, pre_link_type, link->type); + if (is_local_sink_detect_success && link->local_sink && dc_is_dp_signal(link->local_sink->sink_signal) && link->dpcd_caps.is_mst_capable) -- cgit From 7304ee979b6b6422f41a1312391a5e505fc29ccd Mon Sep 17 00:00:00 2001 From: Ayush Gupta Date: Thu, 2 Mar 2023 09:58:05 -0500 Subject: drm/amd/display: disconnect MPCC only on OTG change [Why] Framedrops are observed while playing Vp9 and Av1 10 bit video on 8k resolution using VSR while playback controls are disappeared/appeared [How] Now ODM 2 to 1 is disabled for 5k or greater resolutions on VSR. Cc: stable@vger.kernel.org Cc: Mario Limonciello Reviewed-by: Alvin Lee Acked-by: Qingqing Zhuo Signed-off-by: Ayush Gupta Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 74e50c09bb62..d024007f0f65 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1915,6 +1915,7 @@ int dcn32_populate_dml_pipes_from_context( bool subvp_in_use = false; uint8_t is_pipe_split_expected[MAX_PIPES] = {0}; struct dc_crtc_timing *timing; + bool vsr_odm_support = false; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -1932,12 +1933,15 @@ int dcn32_populate_dml_pipes_from_context( timing = &pipe->stream->timing; pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + vsr_odm_support = (res_ctx->pipe_ctx[i].stream->src.width >= 5120 && + res_ctx->pipe_ctx[i].stream->src.width > res_ctx->pipe_ctx[i].stream->dst.width); if (context->stream_count == 1 && context->stream_status[0].plane_count == 1 && !dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal) && is_h_timing_divisible_by_2(res_ctx->pipe_ctx[i].stream) && pipe->stream->timing.pix_clk_100hz * 100 > DCN3_2_VMIN_DISPCLK_HZ && - dc->debug.enable_single_display_2to1_odm_policy) { + dc->debug.enable_single_display_2to1_odm_policy && + !vsr_odm_support) { //excluding 2to1 ODM combine on >= 5k vsr pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; } pipe_cnt++; -- cgit From 0424a7dfe9129b93f29b277511a60e87f052ac6b Mon Sep 17 00:00:00 2001 From: Shawn Wang Date: Tue, 17 Jan 2023 13:14:50 -0800 Subject: x86/resctrl: Clear staged_config[] before and after it is used As a temporary storage, staged_config[] in rdt_domain should be cleared before and after it is used. The stale value in staged_config[] could cause an MSR access error. Here is a reproducer on a system with 16 usable CLOSIDs for a 15-way L3 Cache (MBA should be disabled if the number of CLOSIDs for MB is less than 16.) : mount -t resctrl resctrl -o cdp /sys/fs/resctrl mkdir /sys/fs/resctrl/p{1..7} umount /sys/fs/resctrl/ mount -t resctrl resctrl /sys/fs/resctrl mkdir /sys/fs/resctrl/p{1..8} An error occurs when creating resource group named p8: unchecked MSR access error: WRMSR to 0xca0 (tried to write 0x00000000000007ff) at rIP: 0xffffffff82249142 (cat_wrmsr+0x32/0x60) Call Trace: __flush_smp_call_function_queue+0x11d/0x170 __sysvec_call_function+0x24/0xd0 sysvec_call_function+0x89/0xc0 asm_sysvec_call_function+0x16/0x20 When creating a new resource control group, hardware will be configured by the following process: rdtgroup_mkdir() rdtgroup_mkdir_ctrl_mon() rdtgroup_init_alloc() resctrl_arch_update_domains() resctrl_arch_update_domains() iterates and updates all resctrl_conf_type whose have_new_ctrl is true. Since staged_config[] holds the same values as when CDP was enabled, it will continue to update the CDP_CODE and CDP_DATA configurations. When group p8 is created, get_config_index() called in resctrl_arch_update_domains() will return 16 and 17 as the CLOSIDs for CDP_CODE and CDP_DATA, which will be translated to an invalid register - 0xca0 in this scenario. Fix it by clearing staged_config[] before and after it is used. [reinette: re-order commit tags] Fixes: 75408e43509e ("x86/resctrl: Allow different CODE/DATA configurations to be staged") Suggested-by: Xin Hao Signed-off-by: Shawn Wang Signed-off-by: Reinette Chatre Signed-off-by: Dave Hansen Tested-by: Reinette Chatre Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/2fad13f49fbe89687fc40e9a5a61f23a28d1507a.1673988935.git.reinette.chatre%40intel.com --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 7 ++----- arch/x86/kernel/cpu/resctrl/internal.h | 1 + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 25 +++++++++++++++++++++---- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index eb07d4435391..b44c487727d4 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -368,7 +368,6 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, { struct resctrl_schema *s; struct rdtgroup *rdtgrp; - struct rdt_domain *dom; struct rdt_resource *r; char *tok, *resname; int ret = 0; @@ -397,10 +396,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, goto out; } - list_for_each_entry(s, &resctrl_schema_all, list) { - list_for_each_entry(dom, &s->res->domains, list) - memset(dom->staged_config, 0, sizeof(dom->staged_config)); - } + rdt_staged_configs_clear(); while ((tok = strsep(&buf, "\n")) != NULL) { resname = strim(strsep(&tok, ":")); @@ -445,6 +441,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, } out: + rdt_staged_configs_clear(); rdtgroup_kn_unlock(of->kn); cpus_read_unlock(); return ret ?: nbytes; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 8edecc5763d8..85ceaf9a31ac 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -555,5 +555,6 @@ void __check_limbo(struct rdt_domain *d, bool force_free); void rdt_domain_reconfigure_cdp(struct rdt_resource *r); void __init thread_throttle_mode_init(void); void __init mbm_config_rftype_init(const char *config); +void rdt_staged_configs_clear(void); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 884b6e9a7e31..6ad33f355861 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -78,6 +78,19 @@ void rdt_last_cmd_printf(const char *fmt, ...) va_end(ap); } +void rdt_staged_configs_clear(void) +{ + struct rdt_resource *r; + struct rdt_domain *dom; + + lockdep_assert_held(&rdtgroup_mutex); + + for_each_alloc_capable_rdt_resource(r) { + list_for_each_entry(dom, &r->domains, list) + memset(dom->staged_config, 0, sizeof(dom->staged_config)); + } +} + /* * Trivial allocator for CLOSIDs. Since h/w only supports a small number, * we can keep a bitmap of free CLOSIDs in a single integer. @@ -3107,7 +3120,9 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) { struct resctrl_schema *s; struct rdt_resource *r; - int ret; + int ret = 0; + + rdt_staged_configs_clear(); list_for_each_entry(s, &resctrl_schema_all, list) { r = s->res; @@ -3119,20 +3134,22 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) } else { ret = rdtgroup_init_cat(s, rdtgrp->closid); if (ret < 0) - return ret; + goto out; } ret = resctrl_arch_update_domains(r, rdtgrp->closid); if (ret < 0) { rdt_last_cmd_puts("Failed to initialize allocations\n"); - return ret; + goto out; } } rdtgrp->mode = RDT_MODE_SHAREABLE; - return 0; +out: + rdt_staged_configs_clear(); + return ret; } static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, -- cgit From 20bc9f76b6a2455c6b54b91ae7634f147f64987f Mon Sep 17 00:00:00 2001 From: David Belanger Date: Tue, 28 Feb 2023 14:11:24 -0500 Subject: drm/amdkfd: Fixed kfd_process cleanup on module exit. Handle case when module is unloaded (kfd_exit) before a process space (mm_struct) is released. v2: Fixed potential race conditions by removing all kfd_process from the process table first, then working on releasing the resources. v3: Fixed loop element access / synchronization. Fixed extra empty lines. Signed-off-by: David Belanger Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 67 ++++++++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 09b966dc3768..aee2212e52f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -77,6 +77,7 @@ err_ioctl: static void kfd_exit(void) { + kfd_cleanup_processes(); kfd_debugfs_fini(); kfd_process_destroy_wq(); kfd_procfs_shutdown(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bfa30d12406b..7e4d992e48b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -928,6 +928,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev); int kfd_process_create_wq(void); void kfd_process_destroy_wq(void); +void kfd_cleanup_processes(void); struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *task); struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7acd55a814b2..4208e0f01064 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn) kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); } +static void kfd_process_notifier_release_internal(struct kfd_process *p) +{ + cancel_delayed_work_sync(&p->eviction_work); + cancel_delayed_work_sync(&p->restore_work); + + /* Indicate to other users that MM is no longer valid */ + p->mm = NULL; + + mmu_notifier_put(&p->mmu_notifier); +} + static void kfd_process_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { @@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, return; mutex_lock(&kfd_processes_mutex); + /* + * Do early return if table is empty. + * + * This could potentially happen if this function is called concurrently + * by mmu_notifier and by kfd_cleanup_pocesses. + * + */ + if (hash_empty(kfd_processes_table)) { + mutex_unlock(&kfd_processes_mutex); + return; + } hash_del_rcu(&p->kfd_processes); mutex_unlock(&kfd_processes_mutex); synchronize_srcu(&kfd_processes_srcu); - cancel_delayed_work_sync(&p->eviction_work); - cancel_delayed_work_sync(&p->restore_work); - - /* Indicate to other users that MM is no longer valid */ - p->mm = NULL; - - mmu_notifier_put(&p->mmu_notifier); + kfd_process_notifier_release_internal(p); } static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { @@ -1200,6 +1216,43 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .free_notifier = kfd_process_free_notifier, }; +/* + * This code handles the case when driver is being unloaded before all + * mm_struct are released. We need to safely free the kfd_process and + * avoid race conditions with mmu_notifier that might try to free them. + * + */ +void kfd_cleanup_processes(void) +{ + struct kfd_process *p; + struct hlist_node *p_temp; + unsigned int temp; + HLIST_HEAD(cleanup_list); + + /* + * Move all remaining kfd_process from the process table to a + * temp list for processing. Once done, callback from mmu_notifier + * release will not see the kfd_process in the table and do early return, + * avoiding double free issues. + */ + mutex_lock(&kfd_processes_mutex); + hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { + hash_del_rcu(&p->kfd_processes); + synchronize_srcu(&kfd_processes_srcu); + hlist_add_head(&p->kfd_processes, &cleanup_list); + } + mutex_unlock(&kfd_processes_mutex); + + hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes) + kfd_process_notifier_release_internal(p); + + /* + * Ensures that all outstanding free_notifier get called, triggering + * the release of the kfd_process struct. + */ + mmu_notifier_synchronize(); +} + static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) { unsigned long offset; -- cgit From f3921a9a641483784448fb982b2eb738b383d9b9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 13 Mar 2023 20:03:08 -0400 Subject: drm/amdgpu: Don't resume IOMMU after incomplete init Check kfd->init_complete in kgd2kfd_iommu_resume, consistent with other kgd2kfd calls. This should fix IOMMU errors on resume from suspend when KFD IOMMU initialization failed. Reported-by: Matt Fagnani Link: https://lore.kernel.org/r/4a3b225c-2ffd-e758-4de1-447375e34cad@bell.net/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=217170 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2454 Cc: Vasant Hegde Cc: Linux regression tracking (Thorsten Leemhuis) Cc: stable@vger.kernel.org Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Tested-by: Matt Fagnani Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3de7f616a001..ec70a1658dc3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -59,6 +59,7 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size); static void kfd_gtt_sa_fini(struct kfd_dev *kfd); +static int kfd_resume_iommu(struct kfd_dev *kfd); static int kfd_resume(struct kfd_dev *kfd); static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) @@ -624,7 +625,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, svm_migrate_init(kfd->adev); - if (kgd2kfd_resume_iommu(kfd)) + if (kfd_resume_iommu(kfd)) goto device_iommu_error; if (kfd_resume(kfd)) @@ -772,6 +773,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) } int kgd2kfd_resume_iommu(struct kfd_dev *kfd) +{ + if (!kfd->init_complete) + return 0; + + return kfd_resume_iommu(kfd); +} + +static int kfd_resume_iommu(struct kfd_dev *kfd) { int err = 0; -- cgit From 37beabe9a891b92174cd1aafbfa881fe9e05aa87 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Wed, 8 Feb 2023 14:25:54 +0200 Subject: net/mlx5e: Fix macsec ASO context alignment Currently mlx5e_macsec_umr struct does not satisfy hardware memory alignment requirement. Hence the result of querying advanced steering operation (ASO) is not copied to the memory region as expected. Fix by satisfying hardware memory alignment requirement and move context to be first field in struct for better readability. Fixes: 1f53da676439 ("net/mlx5e: Create advanced steering operation (ASO) object for MACsec") Signed-off-by: Emeel Hakim Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 08d0929e8260..8af53178e40d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -89,8 +89,8 @@ struct mlx5e_macsec_rx_sc { }; struct mlx5e_macsec_umr { + u8 __aligned(64) ctx[MLX5_ST_SZ_BYTES(macsec_aso)]; dma_addr_t dma_addr; - u8 ctx[MLX5_ST_SZ_BYTES(macsec_aso)]; u32 mkey; }; -- cgit From 9a92fe1db9e57ea94388a1d768e8ee42af858377 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 12 Mar 2021 07:21:29 -0600 Subject: net/mlx5e: Don't cache tunnel offloads capability When mlx5e attaches again after device health recovery, the device capabilities might have changed by the eswitch manager. For example in one flow when ECPF changes the eswitch mode between legacy and switchdev, it updates the flow table tunnel capability. The cached value is only used in one place, so just check the capability there instead. Fixes: 5bef709d76a2 ("net/mlx5: Enable host PF HCA after eswitch is initialized") Signed-off-by: Parav Pandit Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +--- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 1 - 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4276c6eb6820..4a19ef4a9811 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -313,7 +313,6 @@ struct mlx5e_params { } channel; } mqprio; bool rx_cqe_compress_def; - bool tunneled_offload_en; struct dim_cq_moder rx_cq_moderation; struct dim_cq_moder tx_cq_moderation; struct mlx5e_packet_merge_param packet_merge; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 51b5f3cca504..56fc2aebb9ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4979,8 +4979,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* TX inline */ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev); - /* AF_XDP */ params->xsk = xsk; @@ -5285,7 +5283,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) } features = MLX5E_RX_RES_FEATURE_PTP; - if (priv->channels.params.tunneled_offload_en) + if (mlx5_tunnel_inner_ft_supported(mdev)) features |= MLX5E_RX_RES_FEATURE_INNER_FT; err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features, priv->max_nch, priv->drop_rq.rqn, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 43fd12fb87b8..8ff654b4e9e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -755,7 +755,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev) mlx5e_set_rx_cq_mode_params(params, cq_period_mode); params->mqprio.num_tc = 1; - params->tunneled_offload_en = false; if (rep->vport != MLX5_VPORT_UPLINK) params->vlan_strip_disable = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index c2a4f86bc890..baa7ef812313 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -70,7 +70,6 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; - params->tunneled_offload_en = false; /* CQE compression is not supported for IPoIB */ params->rx_cqe_compress_def = false; -- cgit From ba5d8f72b82cc197355c9340ef89dab813815865 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 24 Jun 2021 18:22:57 +0300 Subject: net/mlx5: Fix setting ec_function bit in MANAGE_PAGES When ECPF is a page supplier, reclaim pages missed to honor the ec_function bit provided by the firmware. It always used the ec_function to true during driver unload flow for ECPF. This is incorrect. Honor the ec_function bit provided by device during page allocation request event. Fixes: d6945242f45d ("net/mlx5: Hold pages RB tree per VF") Signed-off-by: Parav Pandit Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 64d4e7125e9b..95dc67fb3001 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -82,6 +82,16 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF; } +static u32 mlx5_get_ec_function(u32 function) +{ + return function >> 16; +} + +static u32 mlx5_get_func_id(u32 function) +{ + return function & 0xffff; +} + static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) { struct rb_root *root; @@ -665,20 +675,22 @@ static int optimal_reclaimed_pages(void) } static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev, - struct rb_root *root, u16 func_id) + struct rb_root *root, u32 function) { u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES)); unsigned long end = jiffies + recl_pages_to_jiffies; while (!RB_EMPTY_ROOT(root)) { + u32 ec_function = mlx5_get_ec_function(function); + u32 function_id = mlx5_get_func_id(function); int nclaimed; int err; - err = reclaim_pages(dev, func_id, optimal_reclaimed_pages(), - &nclaimed, false, mlx5_core_is_ecpf(dev)); + err = reclaim_pages(dev, function_id, optimal_reclaimed_pages(), + &nclaimed, false, ec_function); if (err) { - mlx5_core_warn(dev, "failed reclaiming pages (%d) for func id 0x%x\n", - err, func_id); + mlx5_core_warn(dev, "reclaim_pages err (%d) func_id=0x%x ec_func=0x%x\n", + err, function_id, ec_function); return err; } -- cgit From 7ba930fc25def6fd736abcdfa224272948a65cf7 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 20 Oct 2022 00:13:50 +0300 Subject: net/mlx5: Disable eswitch before waiting for VF pages The offending commit changed the ordering of moving to legacy mode and waiting for the VF pages. Moving to legacy mode is important in bluefield, because it sends the host driver into error state, and frees its pages. Without this transition we end up waiting 2 minutes for pages that aren't coming before carrying on with the unload process. Fixes: f019679ea5f2 ("net/mlx5: E-switch, Remove dependency between sriov and eswitch mode") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 540840e80493..f36a3aa4b5c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1364,8 +1364,8 @@ static void mlx5_unload(struct mlx5_core_dev *dev) { mlx5_devlink_traps_unregister(priv_to_devlink(dev)); mlx5_sf_dev_table_destroy(dev); - mlx5_sriov_detach(dev); mlx5_eswitch_disable(dev->priv.eswitch); + mlx5_sriov_detach(dev); mlx5_lag_remove_mdev(dev); mlx5_ec_cleanup(dev); mlx5_sf_hw_table_destroy(dev); -- cgit From 1313d78ac0c1cfcff7bdece8da54b080e71487c4 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 7 Feb 2023 15:07:00 +0200 Subject: net/mlx5: E-switch, Fix wrong usage of source port rewrite in split rules In few cases, rules with mirror use case are split to two FTEs, one which do the mirror action and forward to second FTE which do the rest of the rule actions and the second redirect action. In case of mirror rules which do split and forward to ovs internal port or VF stack devices, source port rewrite should be used in the second FTE but it is wrongly also set in the first FTE which break the offload. Fix this issue by removing the wrong check if source port rewrite is needed to be used on the first FTE of the split and instead return EOPNOTSUPP which will block offload of rules which mirror to ovs internal port or VF stack devices which isn't supported. Fixes: 10742efc20a4 ("net/mlx5e: VF tunnel TX traffic offloading") Fixes: a508728a4c8b ("net/mlx5e: VF tunnel RX traffic offloading") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d766a64b1823..22075943bb58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -723,11 +723,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < esw_attr->split_count; i++) { - if (esw_is_indir_table(esw, attr)) - err = esw_setup_indir_table(dest, &flow_act, esw, attr, false, &i); - else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) - err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr, - &i); + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + /* Source port rewrite (forward to ovs internal port or statck device) isn't + * supported in the rule of split action. + */ + err = -EOPNOTSUPP; else esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false); -- cgit From 28d3815a629cbdee660dd1c9de28d77cb3d77917 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Wed, 8 Feb 2023 11:37:41 +0200 Subject: net/mlx5: E-switch, Fix missing set of split_count when forward to ovs internal port Rules with mirror actions are split to two FTEs when the actions after the mirror action contains pedit, vlan push/pop or ct. Forward to ovs internal port adds implicit header rewrite (pedit) but missing trigger to do split. Fix by setting split_count when forwarding to ovs internal port which will trigger split in mirror rules. Fixes: 27484f7170ed ("net/mlx5e: Offload tc rules that redirect to ovs internal port") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 70b8d2dfa751..90944bf271ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4304,6 +4304,7 @@ int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, esw_attr->dest_int_port = dest_int_port; esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; + esw_attr->split_count = out_index; /* Forward to root fdb for matching against the new source vport */ attr->dest_chain = 0; -- cgit From c9668f0b1d28570327dbba189f2c61f6f9e43ae7 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 12 Feb 2023 11:01:43 +0200 Subject: net/mlx5e: Fix cleanup null-ptr deref on encap lock During module is unloaded while a peer tc flow is still offloaded, first the peer uplink rep profile is changed to a nic profile, and so neigh encap lock is destroyed. Next during unload, the VF reps netdevs are unregistered which causes the original non-peer tc flow to be deleted, which deletes the peer flow. The peer flow deletion detaches the encap entry and try to take the already destroyed encap lock, causing the below trace. Fix this by clearing peer flows during tc eswitch cleanup (mlx5e_tc_esw_cleanup()). Relevant trace: [ 4316.837128] BUG: kernel NULL pointer dereference, address: 00000000000001d8 [ 4316.842239] RIP: 0010:__mutex_lock+0xb5/0xc40 [ 4316.851897] Call Trace: [ 4316.852481] [ 4316.857214] mlx5e_rep_neigh_entry_release+0x93/0x790 [mlx5_core] [ 4316.858258] mlx5e_rep_encap_entry_detach+0xa7/0xf0 [mlx5_core] [ 4316.859134] mlx5e_encap_dealloc+0xa3/0xf0 [mlx5_core] [ 4316.859867] clean_encap_dests.part.0+0x5c/0xe0 [mlx5_core] [ 4316.860605] mlx5e_tc_del_fdb_flow+0x32a/0x810 [mlx5_core] [ 4316.862609] __mlx5e_tc_del_fdb_peer_flow+0x1a2/0x250 [mlx5_core] [ 4316.863394] mlx5e_tc_del_flow+0x(/0x630 [mlx5_core] [ 4316.864090] mlx5e_flow_put+0x5f/0x100 [mlx5_core] [ 4316.864771] mlx5e_delete_flower+0x4de/0xa40 [mlx5_core] [ 4316.865486] tc_setup_cb_reoffload+0x20/0x80 [ 4316.865905] fl_reoffload+0x47c/0x510 [cls_flower] [ 4316.869181] tcf_block_playback_offloads+0x91/0x1d0 [ 4316.869649] tcf_block_unbind+0xe7/0x1b0 [ 4316.870049] tcf_block_offload_cmd.isra.0+0x1ee/0x270 [ 4316.879266] tcf_block_offload_unbind+0x61/0xa0 [ 4316.879711] __tcf_block_put+0xa4/0x310 Fixes: 04de7dda7394 ("net/mlx5e: Infrastructure for duplicated offloading of TC flows") Fixes: 1418ddd96afd ("net/mlx5e: Duplicate offloaded TC eswitch rules under uplink LAG") Signed-off-by: Paul Blakey Reviewed-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 90944bf271ce..cc35cbc9934d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5464,6 +5464,16 @@ err_tun_mapping: void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) { + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + + rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + + mlx5e_tc_clean_fdb_peer_flows(esw); + mlx5e_tc_tun_cleanup(uplink_priv->encap); mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); -- cgit From dd64572490c3d7aab04083db8791fab157a941ed Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 24 Jan 2023 17:34:32 +0200 Subject: net/mlx5e: kTLS, Fix missing error unwind on unsupported cipher type Do proper error unwinding when adding an unsupported TX/RX cipher type. Move the switch case prior to key creation so there's less to unwind, and change the goto label name to describe the action performed instead of what failed. Fixes: 4960c414db35 ("net/mlx5e: Support 256 bit keys with kTLS device offload") Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 24 ++++++++++++---------- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 22 +++++++++++--------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 4be770443b0c..9b597cb24598 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -621,15 +621,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, if (unlikely(!priv_rx)) return -ENOMEM; - dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); - if (IS_ERR(dek)) { - err = PTR_ERR(dek); - goto err_create_key; - } - priv_rx->dek = dek; - - INIT_LIST_HEAD(&priv_rx->list); - spin_lock_init(&priv_rx->lock); switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: priv_rx->crypto_info.crypto_info_128 = @@ -642,9 +633,20 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, default: WARN_ONCE(1, "Unsupported cipher type %u\n", crypto_info->cipher_type); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_cipher_type; } + dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); + if (IS_ERR(dek)) { + err = PTR_ERR(dek); + goto err_cipher_type; + } + priv_rx->dek = dek; + + INIT_LIST_HEAD(&priv_rx->list); + spin_lock_init(&priv_rx->lock); + rxq = mlx5e_ktls_sk_get_rxq(sk); priv_rx->rxq = rxq; priv_rx->sk = sk; @@ -677,7 +679,7 @@ err_post_wqes: mlx5e_tir_destroy(&priv_rx->tir); err_create_tir: mlx5_ktls_destroy_key(priv->tls->dek_pool, priv_rx->dek); -err_create_key: +err_cipher_type: kfree(priv_rx); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 60b3e08a1028..0e4c0a093293 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -469,14 +469,6 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, if (IS_ERR(priv_tx)) return PTR_ERR(priv_tx); - dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); - if (IS_ERR(dek)) { - err = PTR_ERR(dek); - goto err_create_key; - } - priv_tx->dek = dek; - - priv_tx->expected_seq = start_offload_tcp_sn; switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: priv_tx->crypto_info.crypto_info_128 = @@ -489,8 +481,18 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, default: WARN_ONCE(1, "Unsupported cipher type %u\n", crypto_info->cipher_type); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_pool_push; } + + dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); + if (IS_ERR(dek)) { + err = PTR_ERR(dek); + goto err_pool_push; + } + + priv_tx->dek = dek; + priv_tx->expected_seq = start_offload_tcp_sn; priv_tx->tx_ctx = tls_offload_ctx_tx(tls_ctx); mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx); @@ -500,7 +502,7 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, return 0; -err_create_key: +err_pool_push: pool_push(pool, priv_tx); return err; } -- cgit From 031a163f2c476adcb2c01e27a7d323e66174ac11 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 28 Feb 2023 10:36:19 +0200 Subject: net/mlx5: Set BREAK_FW_WAIT flag first when removing driver Currently, BREAK_FW_WAIT flag is set after syncing with fw_reset. However, fw_reset can call mlx5_load_one() which is waiting for fw init bit and BREAK_FW_WAIT flag is intended to stop. e.g.: the driver might wait on a loop it should exit. Fix it by setting the flag before syncing with fw_reset. Fixes: 8324a02c342a ("net/mlx5: Add exit route when waiting for FW") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f36a3aa4b5c8..f1de152a6113 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1789,11 +1789,11 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain * fw_reset before unregistering the devlink. */ mlx5_drain_fw_reset(dev); - set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); devlink_unregister(devlink); mlx5_sriov_disable(pdev); mlx5_crdump_disable(dev); -- cgit From 78dee7befd56987283c13877b834c0aa97ad51b9 Mon Sep 17 00:00:00 2001 From: Adham Faris Date: Mon, 23 Jan 2023 10:09:01 +0200 Subject: net/mlx5e: Lower maximum allowed MTU in XSK to match XDP prerequisites XSK redirecting XDP programs require linearity, hence applies restrictions on the MTU. For PAGE_SIZE=4K, MTU shouldn't exceed 3498. Features that contradict with XDP such HW-LRO and HW-GRO are enforced by the driver in advance, during XSK params validation, except for MTU, which was not enforced before this patch. This has been spotted during test scenario described below: Attaching xdpsock program (PAGE_SIZE=4K), with MTU < 3498, detaching XDP program, changing the MTU to arbitrary value in the range [3499, 3754], attaching XDP program again, which ended up with failure since MTU is > 3498. This commit lowers the XSK MTU limitation to be aligned with XDP MTU limitation, since XSK socket is meaningless without XDP program. Signed-off-by: Adham Faris Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 56fc2aebb9ee..a7f2ab22cc40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4169,13 +4169,17 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, struct xsk_buff_pool *xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix); struct mlx5e_xsk_param xsk; + int max_xdp_mtu; if (!xsk_pool) continue; mlx5e_build_xsk_param(xsk_pool, &xsk); + max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk); - if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) { + /* Validate XSK params and XDP MTU in advance */ + if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) || + new_params->sw_mtu > max_xdp_mtu) { u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); int max_mtu_frame, max_mtu_page, max_mtu; @@ -4185,9 +4189,9 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, */ max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0)); - max_mtu = min(max_mtu_frame, max_mtu_page); + max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu); - netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n", + netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n", new_params->sw_mtu, ix, max_mtu); return false; } -- cgit From d1a0075ad6b693c2bd41e7aedb8a4f3b74b6999c Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Thu, 16 Feb 2023 12:34:21 +0000 Subject: net/sched: TC, fix raw counter initialization Freed counters may be reused by fs core. As such, raw counters may not be initialized to zero. Cache the counter values when the action stats object is initialized to have a proper base value for calculating the difference from the previous query. Fixes: 2b68d659a704 ("net/mlx5e: TC, support per action stats") Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c index 626cb7470fa5..07c1895a2b23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c @@ -64,6 +64,7 @@ mlx5e_tc_act_stats_add(struct mlx5e_tc_act_stats_handle *handle, { struct mlx5e_tc_act_stats *act_stats, *old_act_stats; struct rhashtable *ht = &handle->ht; + u64 lastused; int err = 0; act_stats = kvzalloc(sizeof(*act_stats), GFP_KERNEL); @@ -73,6 +74,10 @@ mlx5e_tc_act_stats_add(struct mlx5e_tc_act_stats_handle *handle, act_stats->tc_act_cookie = act_cookie; act_stats->counter = counter; + mlx5_fc_query_cached_raw(counter, + &act_stats->lastbytes, + &act_stats->lastpackets, &lastused); + rcu_read_lock(); old_act_stats = rhashtable_lookup_get_insert_fast(ht, &act_stats->hash, -- cgit From 1166add424dae14ccdb64a6eefbf26766c9d0ef2 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Tue, 21 Feb 2023 14:46:56 +0000 Subject: net/mlx5e: TC, fix missing error code Missing error code when mlx5e_tc_act_stats_create fails Fixes: d13674b1d14c ("net/mlx5e: TC, map tc action cookie to a hw counter") Reported-by: Dan Carpenter Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cc35cbc9934d..d2e191ee0704 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5305,8 +5305,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs)); tc->action_stats_handle = mlx5e_tc_act_stats_create(); - if (IS_ERR(tc->action_stats_handle)) + if (IS_ERR(tc->action_stats_handle)) { + err = PTR_ERR(tc->action_stats_handle); goto err_act_stats; + } return 0; @@ -5441,8 +5443,10 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) } uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create(); - if (IS_ERR(uplink_priv->action_stats_handle)) + if (IS_ERR(uplink_priv->action_stats_handle)) { + err = PTR_ERR(uplink_priv->action_stats_handle); goto err_action_counter; + } return 0; -- cgit From b23bf10cca59b2955fa5c2b5e4f753962b4f88ca Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Tue, 21 Feb 2023 15:24:39 +0000 Subject: net/mlx5e: TC, fix cloned flow attribute Currently the cloned flow attr resets the original tc action cookies count. Fix that by resetting the cloned flow attribute. Fixes: cca7eac13856 ("net/mlx5e: TC, store tc action cookies per attr") Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d2e191ee0704..6bfed633343a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3752,7 +3752,7 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, parse_attr->filter_dev = attr->parse_attr->filter_dev; attr2->action = 0; attr2->counter = NULL; - attr->tc_act_cookies_count = 0; + attr2->tc_act_cookies_count = 0; attr2->flags = 0; attr2->parse_attr = parse_attr; attr2->dest_chain = 0; -- cgit From c7b7c64ab5821352db0b3fbaa92773e5a60bfaa7 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Wed, 22 Feb 2023 10:03:36 +0000 Subject: net/mlx5e: TC, Remove error message log print The cited commit attempts to update the hw stats when dumping tc actions. However, the driver may be called to update the stats of a police action that may not be in hardware. In such cases the driver will fail to lookup the police action object and will output an error message both to extack and dmesg. The dmesg error is confusing as it may not indicate an actual error. Remove the dmesg error. Fixes: 2b68d659a704 ("net/mlx5e: TC, support per action stats") Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c index c4378afdec09..1bd1c94fb977 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -178,7 +178,6 @@ tc_act_police_stats(struct mlx5e_priv *priv, meter = mlx5e_tc_meter_get(priv->mdev, ¶ms); if (IS_ERR(meter)) { NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter"); - mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index); return PTR_ERR(meter); } -- cgit From 6acd352dfee558194643adbed7e849fe80fd1b93 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Sat, 18 Mar 2023 02:25:38 +0800 Subject: io_uring: rsrc: Optimize return value variable 'ret' The initialization assignment of the variable ret is changed to 0, only in 'goto fail;' Use the ret variable as the function return value. Signed-off-by: Li zeming Link: https://lore.kernel.org/r/20230317182538.3027-1-zeming@nfschina.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 056f40946ff6..09a16d709cb5 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -410,7 +410,7 @@ __cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, unsigned nr, struct io_rsrc_data **pdata) { struct io_rsrc_data *data; - int ret = -ENOMEM; + int ret = 0; unsigned i; data = kzalloc(sizeof(*data), GFP_KERNEL); -- cgit From a5bb73b3f5db1a4e91402ad132b59b13d2651ed9 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 14 Mar 2023 02:31:45 -0700 Subject: hwmon: (adm1266) Set `can_sleep` flag for GPIO chip The adm1266 driver uses I2C bus access in its GPIO chip `set` and `get` implementation. This means these functions can sleep and the GPIO chip should set the `can_sleep` property to true. This will ensure that a warning is printed when trying to set or get the GPIO value from a context that potentially can't sleep. Fixes: d98dfad35c38 ("hwmon: (pmbus/adm1266) Add support for GPIOs") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20230314093146.2443845-1-lars@metafoo.de Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/adm1266.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c index ec5f932fc6f0..1ac2b2f4c570 100644 --- a/drivers/hwmon/pmbus/adm1266.c +++ b/drivers/hwmon/pmbus/adm1266.c @@ -301,6 +301,7 @@ static int adm1266_config_gpio(struct adm1266_data *data) data->gc.label = name; data->gc.parent = &data->client->dev; data->gc.owner = THIS_MODULE; + data->gc.can_sleep = true; data->gc.base = -1; data->gc.names = data->gpio_names; data->gc.ngpio = ARRAY_SIZE(data->gpio_names); -- cgit From ab00709310eedcd8dae0df1f66d332f9bc64c99e Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 14 Mar 2023 02:31:46 -0700 Subject: hwmon: (ltc2992) Set `can_sleep` flag for GPIO chip The ltc2992 drivers uses a mutex and I2C bus access in its GPIO chip `set` and `get` implementation. This means these functions can sleep and the GPIO chip should set the `can_sleep` property to true. This will ensure that a warning is printed when trying to set or get the GPIO value from a context that potentially can't sleep. Fixes: 9ca26df1ba25 ("hwmon: (ltc2992) Add support for GPIOs.") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20230314093146.2443845-2-lars@metafoo.de Signed-off-by: Guenter Roeck --- drivers/hwmon/ltc2992.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 88514152d930..69341de397cb 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -323,6 +323,7 @@ static int ltc2992_config_gpio(struct ltc2992_state *st) st->gc.label = name; st->gc.parent = &st->client->dev; st->gc.owner = THIS_MODULE; + st->gc.can_sleep = true; st->gc.base = -1; st->gc.names = st->gpio_names; st->gc.ngpio = ARRAY_SIZE(st->gpio_names); -- cgit From 7c10131803e45269ddc6c817f19ed649110f3cae Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Tue, 14 Mar 2023 10:33:51 -0500 Subject: veth: Fix use after free in XDP_REDIRECT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 718a18a0c8a6 ("veth: Rework veth_xdp_rcv_skb in order to accept non-linear skb") introduced a bug where it tried to use pskb_expand_head() if the headroom was less than XDP_PACKET_HEADROOM. This however uses kmalloc to expand the head, which will later allow consume_skb() to free the skb while is it still in use by AF_XDP. Previously if the headroom was less than XDP_PACKET_HEADROOM we continued on to allocate a new skb from pages so this restores that behavior. BUG: KASAN: use-after-free in __xsk_rcv+0x18d/0x2c0 Read of size 78 at addr ffff888976250154 by task napi/iconduit-g/148640 CPU: 5 PID: 148640 Comm: napi/iconduit-g Kdump: loaded Tainted: G O 6.1.4-cloudflare-kasan-2023.1.2 #1 Hardware name: Quanta Computer Inc. QuantaPlex T41S-2U/S2S-MB, BIOS S2S_3B10.03 06/21/2018 Call Trace: dump_stack_lvl+0x34/0x48 print_report+0x170/0x473 ? __xsk_rcv+0x18d/0x2c0 kasan_report+0xad/0x130 ? __xsk_rcv+0x18d/0x2c0 kasan_check_range+0x149/0x1a0 memcpy+0x20/0x60 __xsk_rcv+0x18d/0x2c0 __xsk_map_redirect+0x1f3/0x490 ? veth_xdp_rcv_skb+0x89c/0x1ba0 [veth] xdp_do_redirect+0x5ca/0xd60 veth_xdp_rcv_skb+0x935/0x1ba0 [veth] ? __netif_receive_skb_list_core+0x671/0x920 ? veth_xdp+0x670/0x670 [veth] veth_xdp_rcv+0x304/0xa20 [veth] ? do_xdp_generic+0x150/0x150 ? veth_xdp_rcv_one+0xde0/0xde0 [veth] ? _raw_spin_lock_bh+0xe0/0xe0 ? newidle_balance+0x887/0xe30 ? __perf_event_task_sched_in+0xdb/0x800 veth_poll+0x139/0x571 [veth] ? veth_xdp_rcv+0xa20/0xa20 [veth] ? _raw_spin_unlock+0x39/0x70 ? finish_task_switch.isra.0+0x17e/0x7d0 ? __switch_to+0x5cf/0x1070 ? __schedule+0x95b/0x2640 ? io_schedule_timeout+0x160/0x160 __napi_poll+0xa1/0x440 napi_threaded_poll+0x3d1/0x460 ? __napi_poll+0x440/0x440 ? __kthread_parkme+0xc6/0x1f0 ? __napi_poll+0x440/0x440 kthread+0x2a2/0x340 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Freed by task 148640: kasan_save_stack+0x23/0x50 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x40 ____kasan_slab_free+0x169/0x1d0 slab_free_freelist_hook+0xd2/0x190 __kmem_cache_free+0x1a1/0x2f0 skb_release_data+0x449/0x600 consume_skb+0x9f/0x1c0 veth_xdp_rcv_skb+0x89c/0x1ba0 [veth] veth_xdp_rcv+0x304/0xa20 [veth] veth_poll+0x139/0x571 [veth] __napi_poll+0xa1/0x440 napi_threaded_poll+0x3d1/0x460 kthread+0x2a2/0x340 ret_from_fork+0x22/0x30 The buggy address belongs to the object at ffff888976250000 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 340 bytes inside of 2048-byte region [ffff888976250000, ffff888976250800) The buggy address belongs to the physical page: page:00000000ae18262a refcount:2 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x976250 head:00000000ae18262a order:3 compound_mapcount:0 compound_pincount:0 flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff) raw: 002ffff800010200 0000000000000000 dead000000000122 ffff88810004cf00 raw: 0000000000000000 0000000080080008 00000002ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888976250000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888976250080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > ffff888976250100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888976250180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888976250200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 718a18a0c8a6 ("veth: Rework veth_xdp_rcv_skb in order to accept non-linear skb") Signed-off-by: Shawn Bohrer Acked-by: Lorenzo Bianconi Acked-by: Toshiaki Makita Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20230314153351.2201328-1-sbohrer@cloudflare.com Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 4da74ac27f9a..a30a66cace14 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -708,7 +708,8 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, u32 frame_sz; if (skb_shared(skb) || skb_head_is_locked(skb) || - skb_shinfo(skb)->nr_frags) { + skb_shinfo(skb)->nr_frags || + skb_headroom(skb) < XDP_PACKET_HEADROOM) { u32 size, len, max_head_size, off; struct sk_buff *nskb; struct page *page; @@ -773,9 +774,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, consume_skb(skb); skb = nskb; - } else if (skb_headroom(skb) < XDP_PACKET_HEADROOM && - pskb_expand_head(skb, VETH_XDP_HEADROOM, 0, GFP_ATOMIC)) { - goto drop; } /* SKB "head" area always have tailroom for skb_shared_info */ -- cgit From cd356010ce4c69ac7e1a40586112df24d22c6a4b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 14 Mar 2023 17:30:25 +0200 Subject: net: phy: mscc: fix deadlock in phy_ethtool_{get,set}_wol() Since the blamed commit, phy_ethtool_get_wol() and phy_ethtool_set_wol() acquire phydev->lock, but the mscc phy driver implementations, vsc85xx_wol_get() and vsc85xx_wol_set(), acquire the same lock as well, resulting in a deadlock. $ ip link set swp3 down ============================================ WARNING: possible recursive locking detected mscc_felix 0000:00:00.5 swp3: Link is Down -------------------------------------------- ip/375 is trying to acquire lock: ffff3d7e82e987a8 (&dev->lock){+.+.}-{4:4}, at: vsc85xx_wol_get+0x2c/0xf4 but task is already holding lock: ffff3d7e82e987a8 (&dev->lock){+.+.}-{4:4}, at: phy_ethtool_get_wol+0x3c/0x6c other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&dev->lock); lock(&dev->lock); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by ip/375: #0: ffffd43b2a955788 (rtnl_mutex){+.+.}-{4:4}, at: rtnetlink_rcv_msg+0x144/0x58c #1: ffff3d7e82e987a8 (&dev->lock){+.+.}-{4:4}, at: phy_ethtool_get_wol+0x3c/0x6c Call trace: __mutex_lock+0x98/0x454 mutex_lock_nested+0x2c/0x38 vsc85xx_wol_get+0x2c/0xf4 phy_ethtool_get_wol+0x50/0x6c phy_suspend+0x84/0xcc phy_state_machine+0x1b8/0x27c phy_stop+0x70/0x154 phylink_stop+0x34/0xc0 dsa_port_disable_rt+0x2c/0xa4 dsa_slave_close+0x38/0xec __dev_close_many+0xc8/0x16c __dev_change_flags+0xdc/0x218 dev_change_flags+0x24/0x6c do_setlink+0x234/0xea4 __rtnl_newlink+0x46c/0x878 rtnl_newlink+0x50/0x7c rtnetlink_rcv_msg+0x16c/0x58c Removing the mutex_lock(&phydev->lock) calls from the driver restores the functionality. Fixes: 2f987d486610 ("net: phy: Add locks to ethtool functions") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230314153025.2372970-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc_main.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 8a13b1ad9a33..62bf99e45af1 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -280,12 +280,9 @@ static int vsc85xx_wol_set(struct phy_device *phydev, u16 pwd[3] = {0, 0, 0}; struct ethtool_wolinfo *wol_conf = wol; - mutex_lock(&phydev->lock); rc = phy_select_page(phydev, MSCC_PHY_PAGE_EXTENDED_2); - if (rc < 0) { - rc = phy_restore_page(phydev, rc, rc); - goto out_unlock; - } + if (rc < 0) + return phy_restore_page(phydev, rc, rc); if (wol->wolopts & WAKE_MAGIC) { /* Store the device address for the magic packet */ @@ -323,7 +320,7 @@ static int vsc85xx_wol_set(struct phy_device *phydev, rc = phy_restore_page(phydev, rc, rc > 0 ? 0 : rc); if (rc < 0) - goto out_unlock; + return rc; if (wol->wolopts & WAKE_MAGIC) { /* Enable the WOL interrupt */ @@ -331,22 +328,19 @@ static int vsc85xx_wol_set(struct phy_device *phydev, reg_val |= MII_VSC85XX_INT_MASK_WOL; rc = phy_write(phydev, MII_VSC85XX_INT_MASK, reg_val); if (rc) - goto out_unlock; + return rc; } else { /* Disable the WOL interrupt */ reg_val = phy_read(phydev, MII_VSC85XX_INT_MASK); reg_val &= (~MII_VSC85XX_INT_MASK_WOL); rc = phy_write(phydev, MII_VSC85XX_INT_MASK, reg_val); if (rc) - goto out_unlock; + return rc; } /* Clear WOL iterrupt status */ reg_val = phy_read(phydev, MII_VSC85XX_INT_STATUS); -out_unlock: - mutex_unlock(&phydev->lock); - - return rc; + return 0; } static void vsc85xx_wol_get(struct phy_device *phydev, @@ -358,10 +352,9 @@ static void vsc85xx_wol_get(struct phy_device *phydev, u16 pwd[3] = {0, 0, 0}; struct ethtool_wolinfo *wol_conf = wol; - mutex_lock(&phydev->lock); rc = phy_select_page(phydev, MSCC_PHY_PAGE_EXTENDED_2); if (rc < 0) - goto out_unlock; + goto out_restore_page; reg_val = __phy_read(phydev, MSCC_PHY_WOL_MAC_CONTROL); if (reg_val & SECURE_ON_ENABLE) @@ -377,9 +370,8 @@ static void vsc85xx_wol_get(struct phy_device *phydev, } } -out_unlock: +out_restore_page: phy_restore_page(phydev, rc, rc > 0 ? 0 : rc); - mutex_unlock(&phydev->lock); } #if IS_ENABLED(CONFIG_OF_MDIO) -- cgit From a075bacde257f755bea0e53400c9f1cdd1b8e8e6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 14 Mar 2023 16:31:32 -0700 Subject: fsverity: don't drop pagecache at end of FS_IOC_ENABLE_VERITY The full pagecache drop at the end of FS_IOC_ENABLE_VERITY is causing performance problems and is hindering adoption of fsverity. It was intended to solve a race condition where unverified pages might be left in the pagecache. But actually it doesn't solve it fully. Since the incomplete solution for this race condition has too much performance impact for it to be worth it, let's remove it for now. Fixes: 3fda4c617e84 ("fs-verity: implement FS_IOC_ENABLE_VERITY ioctl") Cc: stable@vger.kernel.org Reviewed-by: Victor Hsieh Link: https://lore.kernel.org/r/20230314235332.50270-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/verity/enable.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/verity/enable.c b/fs/verity/enable.c index e13db6507b38..7a0e3a84d370 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -8,7 +8,6 @@ #include "fsverity_private.h" #include -#include #include #include @@ -367,25 +366,27 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg) goto out_drop_write; err = enable_verity(filp, &arg); - if (err) - goto out_allow_write_access; /* - * Some pages of the file may have been evicted from pagecache after - * being used in the Merkle tree construction, then read into pagecache - * again by another process reading from the file concurrently. Since - * these pages didn't undergo verification against the file digest which - * fs-verity now claims to be enforcing, we have to wipe the pagecache - * to ensure that all future reads are verified. + * We no longer drop the inode's pagecache after enabling verity. This + * used to be done to try to avoid a race condition where pages could be + * evicted after being used in the Merkle tree construction, then + * re-instantiated by a concurrent read. Such pages are unverified, and + * the backing storage could have filled them with different content, so + * they shouldn't be used to fulfill reads once verity is enabled. + * + * But, dropping the pagecache has a big performance impact, and it + * doesn't fully solve the race condition anyway. So for those reasons, + * and also because this race condition isn't very important relatively + * speaking (especially for small-ish files, where the chance of a page + * being used, evicted, *and* re-instantiated all while enabling verity + * is quite small), we no longer drop the inode's pagecache. */ - filemap_write_and_wait(inode->i_mapping); - invalidate_inode_pages2(inode->i_mapping); /* * allow_write_access() is needed to pair with deny_write_access(). * Regardless, the filesystem won't allow writing to verity files. */ -out_allow_write_access: allow_write_access(filp); out_drop_write: mnt_drop_write_file(filp); -- cgit From 5bc9e2d43f86105a95f86fa096fb4e517bb0ce73 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Tue, 14 Mar 2023 23:58:05 +0100 Subject: ata: pata_parport: fix memory leaks When ida_alloc() fails, "pi" is not freed although the misleading comment says otherwise. Move the ida_alloc() call up so we really don't have to free "pi" in case of ida_alloc() failure. Also move ida_free() call from pi_remove_one() to pata_parport_dev_release(). It was dereferencing already freed dev pointer. Testing revealed leak even in non-failure case which was tracked down to missing put_device() call after bus_find_device_by_name(). As a result, pata_parport_dev_release() was never called. Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202303111822.IHNchbkp-lkp@intel.com/ Signed-off-by: Ondrej Zary Signed-off-by: Damien Le Moal --- drivers/ata/pata_parport/pata_parport.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c index 31c9677a45e3..c1576d943b43 100644 --- a/drivers/ata/pata_parport/pata_parport.c +++ b/drivers/ata/pata_parport/pata_parport.c @@ -381,6 +381,7 @@ static void pata_parport_dev_release(struct device *dev) { struct pi_adapter *pi = container_of(dev, struct pi_adapter, dev); + ida_free(&pata_parport_bus_dev_ids, dev->id); kfree(pi); } @@ -433,23 +434,27 @@ static struct pi_adapter *pi_init_one(struct parport *parport, if (bus_for_each_dev(&pata_parport_bus_type, NULL, &match, pi_find_dev)) return NULL; + id = ida_alloc(&pata_parport_bus_dev_ids, GFP_KERNEL); + if (id < 0) + return NULL; + pi = kzalloc(sizeof(struct pi_adapter), GFP_KERNEL); - if (!pi) + if (!pi) { + ida_free(&pata_parport_bus_dev_ids, id); return NULL; + } /* set up pi->dev before pi_probe_unit() so it can use dev_printk() */ pi->dev.parent = &pata_parport_bus; pi->dev.bus = &pata_parport_bus_type; pi->dev.driver = &pr->driver; pi->dev.release = pata_parport_dev_release; - id = ida_alloc(&pata_parport_bus_dev_ids, GFP_KERNEL); - if (id < 0) - return NULL; /* pata_parport_dev_release will do kfree(pi) */ pi->dev.id = id; dev_set_name(&pi->dev, "pata_parport.%u", pi->dev.id); if (device_register(&pi->dev)) { put_device(&pi->dev); - goto out_ida_free; + /* pata_parport_dev_release will do ida_free(dev->id) and kfree(pi) */ + return NULL; } pi->proto = pr; @@ -464,8 +469,7 @@ static struct pi_adapter *pi_init_one(struct parport *parport, pi->port = parport->base; par_cb.private = pi; - pi->pardev = parport_register_dev_model(parport, DRV_NAME, &par_cb, - pi->dev.id); + pi->pardev = parport_register_dev_model(parport, DRV_NAME, &par_cb, id); if (!pi->pardev) goto out_module_put; @@ -501,8 +505,7 @@ out_module_put: module_put(pi->proto->owner); out_unreg_dev: device_unregister(&pi->dev); -out_ida_free: - ida_free(&pata_parport_bus_dev_ids, pi->dev.id); + /* pata_parport_dev_release will do ida_free(dev->id) and kfree(pi) */ return NULL; } @@ -627,8 +630,7 @@ static void pi_remove_one(struct device *dev) pi_disconnect(pi); pi_release(pi); device_unregister(dev); - ida_free(&pata_parport_bus_dev_ids, dev->id); - /* pata_parport_dev_release will do kfree(pi) */ + /* pata_parport_dev_release will do ida_free(dev->id) and kfree(pi) */ } static ssize_t delete_device_store(struct bus_type *bus, const char *buf, @@ -644,6 +646,7 @@ static ssize_t delete_device_store(struct bus_type *bus, const char *buf, } pi_remove_one(dev); + put_device(dev); mutex_unlock(&pi_mutex); return count; -- cgit From 7ad2c39860dc0ca01d2152232224d2124e160fe3 Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Thu, 16 Mar 2023 16:39:54 +0800 Subject: xen: remove unnecessary (void*) conversions Pointer variables of void * type do not require type cast. Signed-off-by: Yu Zhe Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20230316083954.4223-1-yuzhe@nfschina.com Signed-off-by: Juergen Gross --- drivers/xen/xenfs/xensyms.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xenfs/xensyms.c b/drivers/xen/xenfs/xensyms.c index c6c73a33c44d..b799bc759c15 100644 --- a/drivers/xen/xenfs/xensyms.c +++ b/drivers/xen/xenfs/xensyms.c @@ -64,7 +64,7 @@ static int xensyms_next_sym(struct xensyms *xs) static void *xensyms_start(struct seq_file *m, loff_t *pos) { - struct xensyms *xs = (struct xensyms *)m->private; + struct xensyms *xs = m->private; xs->op.u.symdata.symnum = *pos; @@ -76,7 +76,7 @@ static void *xensyms_start(struct seq_file *m, loff_t *pos) static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos) { - struct xensyms *xs = (struct xensyms *)m->private; + struct xensyms *xs = m->private; xs->op.u.symdata.symnum = ++(*pos); @@ -88,7 +88,7 @@ static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos) static int xensyms_show(struct seq_file *m, void *p) { - struct xensyms *xs = (struct xensyms *)m->private; + struct xensyms *xs = m->private; struct xenpf_symdata *symdata = &xs->op.u.symdata; seq_printf(m, "%016llx %c %s\n", symdata->address, @@ -120,7 +120,7 @@ static int xensyms_open(struct inode *inode, struct file *file) return ret; m = file->private_data; - xs = (struct xensyms *)m->private; + xs = m->private; xs->namelen = XEN_KSYM_NAME_LEN + 1; xs->name = kzalloc(xs->namelen, GFP_KERNEL); @@ -138,7 +138,7 @@ static int xensyms_open(struct inode *inode, struct file *file) static int xensyms_release(struct inode *inode, struct file *file) { struct seq_file *m = file->private_data; - struct xensyms *xs = (struct xensyms *)m->private; + struct xensyms *xs = m->private; kfree(xs->name); return seq_release_private(inode, file); -- cgit From cbebd68f59f03633469f3ecf9bea99cd6cce3854 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Mon, 6 Mar 2023 08:06:56 -0800 Subject: x86/mm: Fix use of uninitialized buffer in sme_enable() cmdline_find_option() may fail before doing any initialization of the buffer array. This may lead to unpredictable results when the same buffer is used later in calls to strncmp() function. Fix the issue by returning early if cmdline_find_option() returns an error. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption") Signed-off-by: Nikita Zhandarovich Signed-off-by: Borislav Petkov (AMD) Acked-by: Tom Lendacky Cc: Link: https://lore.kernel.org/r/20230306160656.14844-1-n.zhandarovich@fintech.ru --- arch/x86/mm/mem_encrypt_identity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 88cccd65029d..c6efcf559d88 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -600,7 +600,8 @@ void __init sme_enable(struct boot_params *bp) cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | ((u64)bp->ext_cmd_line_ptr << 32)); - cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); + if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0) + return; if (!strncmp(buffer, cmdline_on, sizeof(buffer))) sme_me_mask = me_mask; -- cgit From c3aa32ac86fe5f27659f07474995ec743a3251b0 Mon Sep 17 00:00:00 2001 From: Hongren Zheng Date: Thu, 16 Mar 2023 01:31:53 +0800 Subject: MAINTAINERS: make me a reviewer of USB/IP I think I am familiar enough with USB/IP and is adequate as a reviewer. Every time there is some patch/bug, I wish I can get pinged and I will feedback on that. I had some contributions to USBIP and some support for it. Contribution: Commit 17af79321 ("docs: usbip: Fix major fields and descriptions in protocol") Commit b737eecd4 ("usbip: tools: add options and examples in man page related to device mode") Commit a58977b2f ("usbip: tools: add usage of device mode in usbip_list.c") Support: Commit 8f36b3b4e1 ("usbip: add USBIP_URB_* URB transfer flags") Bug report: https://lore.kernel.org/lkml/ZBHxfUX60EyCMw5l@Sun/ I also have implemented a userspace usbip server in https://github.com/canokeys/canokey-usbip and maintain a list of usbip implementations https://github.com/usbip/implementations Signed-off-by: Hongren (Zenithal) Zheng Acked-by: Shuah Khan Link: https://lore.kernel.org/r/ZBIBCRiFGSqQcOon@Sun Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f305..110944cea89b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21651,6 +21651,7 @@ USB OVER IP DRIVER M: Valentina Manea M: Shuah Khan M: Shuah Khan +R: Hongren Zheng L: linux-usb@vger.kernel.org S: Maintained F: Documentation/usb/usbip_protocol.rst -- cgit From 5bc38d33a5a1209fd4de65101d1ae8255ea12c6e Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 7 Mar 2023 06:14:20 -0500 Subject: usb: cdnsp: Fixes issue with redundant Status Stage In some cases, driver trees to send Status Stage twice. The first one from upper layer of gadget usb subsystem and second time from controller driver. This patch fixes this issue and remove tricky handling of SET_INTERFACE from controller driver which is no longer needed. cc: Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20230307111420.376056-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ep0.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c index 9b8325f82499..d63d5d92f255 100644 --- a/drivers/usb/cdns3/cdnsp-ep0.c +++ b/drivers/usb/cdns3/cdnsp-ep0.c @@ -403,20 +403,6 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev, case USB_REQ_SET_ISOCH_DELAY: ret = cdnsp_ep0_set_isoch_delay(pdev, ctrl); break; - case USB_REQ_SET_INTERFACE: - /* - * Add request into pending list to block sending status stage - * by libcomposite. - */ - list_add_tail(&pdev->ep0_preq.list, - &pdev->ep0_preq.pep->pending_list); - - ret = cdnsp_ep0_delegate_req(pdev, ctrl); - if (ret == -EBUSY) - ret = 0; - - list_del(&pdev->ep0_preq.list); - break; default: ret = cdnsp_ep0_delegate_req(pdev, ctrl); break; @@ -474,9 +460,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev) else ret = cdnsp_ep0_delegate_req(pdev, ctrl); - if (!len) - pdev->ep0_stage = CDNSP_STATUS_STAGE; - if (ret == USB_GADGET_DELAYED_STATUS) { trace_cdnsp_ep0_status_stage("delayed"); return; @@ -484,6 +467,6 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev) out: if (ret < 0) cdnsp_ep0_stall(pdev); - else if (pdev->ep0_stage == CDNSP_STATUS_STAGE) + else if (!len && pdev->ep0_stage != CDNSP_STATUS_STAGE) cdnsp_status_stage(pdev); } -- cgit From 1272fd652a226ccb34e9f47371b6121948048438 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Wed, 8 Mar 2023 07:44:27 -0500 Subject: usb: cdns3: Fix issue with using incorrect PCI device function PCI based platform can have more than two PCI functions. USBSS PCI Glue driver during initialization should consider only DRD/HOST/DEVICE PCI functions and all other should be ignored. This patch adds additional condition which causes that only DRD and HOST/DEVICE function will be accepted. cc: Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20230308124427.311245-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-pci-wrap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/cdns3/cdns3-pci-wrap.c b/drivers/usb/cdns3/cdns3-pci-wrap.c index deeea618ba33..1f6320d98a76 100644 --- a/drivers/usb/cdns3/cdns3-pci-wrap.c +++ b/drivers/usb/cdns3/cdns3-pci-wrap.c @@ -60,6 +60,11 @@ static struct pci_dev *cdns3_get_second_fun(struct pci_dev *pdev) return NULL; } + if (func->devfn != PCI_DEV_FN_HOST_DEVICE && + func->devfn != PCI_DEV_FN_OTG) { + return NULL; + } + return func; } -- cgit From 96b96b2a567fb34dd41c87e6cf01f6902ce8cae4 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 9 Mar 2023 01:30:48 -0500 Subject: usb: cdnsp: changes PCI Device ID to fix conflict with CNDS3 driver Patch changes CDNS_DEVICE_ID in USBSSP PCI Glue driver to remove the conflict with Cadence USBSS driver. cc: Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20230309063048.299378-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-pci.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c index efd54ed918b9..7b151f5af3cc 100644 --- a/drivers/usb/cdns3/cdnsp-pci.c +++ b/drivers/usb/cdns3/cdnsp-pci.c @@ -29,30 +29,23 @@ #define PLAT_DRIVER_NAME "cdns-usbssp" #define CDNS_VENDOR_ID 0x17cd -#define CDNS_DEVICE_ID 0x0100 +#define CDNS_DEVICE_ID 0x0200 +#define CDNS_DRD_ID 0x0100 #define CDNS_DRD_IF (PCI_CLASS_SERIAL_USB << 8 | 0x80) static struct pci_dev *cdnsp_get_second_fun(struct pci_dev *pdev) { - struct pci_dev *func; - /* * Gets the second function. - * It's little tricky, but this platform has two function. - * The fist keeps resources for Host/Device while the second - * keeps resources for DRD/OTG. + * Platform has two function. The fist keeps resources for + * Host/Device while the secon keeps resources for DRD/OTG. */ - func = pci_get_device(pdev->vendor, pdev->device, NULL); - if (!func) - return NULL; + if (pdev->device == CDNS_DEVICE_ID) + return pci_get_device(pdev->vendor, CDNS_DRD_ID, NULL); + else if (pdev->device == CDNS_DRD_ID) + return pci_get_device(pdev->vendor, CDNS_DEVICE_ID, NULL); - if (func->devfn == pdev->devfn) { - func = pci_get_device(pdev->vendor, pdev->device, func); - if (!func) - return NULL; - } - - return func; + return NULL; } static int cdnsp_pci_probe(struct pci_dev *pdev, @@ -230,6 +223,8 @@ static const struct pci_device_id cdnsp_pci_ids[] = { PCI_CLASS_SERIAL_USB_DEVICE, PCI_ANY_ID }, { PCI_VENDOR_ID_CDNS, CDNS_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, CDNS_DRD_IF, PCI_ANY_ID }, + { PCI_VENDOR_ID_CDNS, CDNS_DRD_ID, PCI_ANY_ID, PCI_ANY_ID, + CDNS_DRD_IF, PCI_ANY_ID }, { 0, } }; -- cgit From a37eb61b6ec064ac794b8a1e89fd33eb582fe51d Mon Sep 17 00:00:00 2001 From: Yaroslav Furman Date: Sun, 12 Mar 2023 11:07:45 +0200 Subject: uas: Add US_FL_NO_REPORT_OPCODES for JMicron JMS583Gen 2 Just like other JMicron JMS5xx enclosures, it chokes on report-opcodes, let's avoid them. Signed-off-by: Yaroslav Furman Cc: stable Link: https://lore.kernel.org/r/20230312090745.47962-1-yaro330@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index c7b763d6d102..1f8c9b16a0fb 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -111,6 +111,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA), +/* Reported by: Yaroslav Furman */ +UNUSUAL_DEV(0x152d, 0x0583, 0x0000, 0x9999, + "JMicron", + "JMS583Gen 2", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_OPCODES), + /* Reported-by: Thinh Nguyen */ UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999, "PNY", -- cgit From bbf860ed710bacc0279c4cda2817f70e1200d04b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 13 Mar 2023 17:45:22 +0200 Subject: usb: gadget: Use correct endianness of the wLength field for WebUSB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WebUSB code uses wLength directly without proper endianness conversion. Update it to use already prepared temporary variable w_length instead. Fixes: 93c473948c58 ("usb: gadget: add WebUSB landing page support") Signed-off-by: Andy Shevchenko Tested-By: Jó Ágila Bitsch Link: https://lore.kernel.org/r/20230313154522.52684-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index fa7dd6cf014d..5377d873c08e 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2079,10 +2079,9 @@ unknown: sizeof(url_descriptor->URL) - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_offset); - if (ctrl->wLength < WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH - + landing_page_length) - landing_page_length = ctrl->wLength - - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_offset; + if (w_length < WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_length) + landing_page_length = w_length + - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + landing_page_offset; memcpy(url_descriptor->URL, cdev->landing_page + landing_page_offset, -- cgit From 5da28edd7bd5518f97175ecea77615bb729a7a28 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Mar 2023 12:11:42 +0000 Subject: io_uring/msg_ring: let target know allocated index msg_ring requests transferring files support auto index selection via IORING_FILE_INDEX_ALLOC, however they don't return the selected index to the target ring and there is no other good way for the userspace to know where is the receieved file. Return the index for allocated slots and 0 otherwise, which is consistent with other fixed file installing requests. Cc: stable@vger.kernel.org # v6.0+ Fixes: e6130eba8a848 ("io_uring: add support for passing fixed file descriptors") Signed-off-by: Pavel Begunkov Link: https://github.com/axboe/liburing/issues/809 Signed-off-by: Jens Axboe --- io_uring/msg_ring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 8803c0979e2a..85fd7ce5f05b 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -202,7 +202,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag * completes with -EOVERFLOW, then the sender must ensure that a * later IORING_OP_MSG_RING delivers the message. */ - if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) + if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0)) ret = -EOVERFLOW; out_unlock: io_double_unlock_ctx(target_ctx); @@ -229,6 +229,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) struct io_ring_ctx *ctx = req->ctx; struct file *src_file = msg->src_file; + if (msg->len) + return -EINVAL; if (target_ctx == ctx) return -EINVAL; if (target_ctx->flags & IORING_SETUP_R_DISABLED) -- cgit From 81f59a26f3d59c6aeb137b7b5546848779222c65 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 16 Mar 2023 00:50:17 +0900 Subject: kbuild: rpm-pkg: move source components to rpmbuild/SOURCES Prepare to add more files to the source RPM. Also, fix the build error when KCONFIG_CONFIG is set: error: Bad file: ./.config: No such file or directory Signed-off-by: Masahiro Yamada --- .gitignore | 1 + Makefile | 2 +- scripts/Makefile.package | 2 +- scripts/package/mkspec | 12 ++++++++---- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 8fe465f251c0..70ec6037fa7a 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,7 @@ modules.order # RPM spec file (make rpm-pkg) # /*.spec +/rpmbuild/ # # Debian directory (make deb-pkg) diff --git a/Makefile b/Makefile index d0a0ba8e5a2e..dfff9f8d28e5 100644 --- a/Makefile +++ b/Makefile @@ -1605,7 +1605,7 @@ MRPROPER_FILES += include/config include/generated \ certs/signing_key.pem \ certs/x509.genkey \ vmlinux-gdb.py \ - *.spec \ + *.spec rpmbuild \ rust/libmacros.so # clean - Delete most, but leave enough to build external modules diff --git a/scripts/Makefile.package b/scripts/Makefile.package index b941e6341b36..a0355bdeebff 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -89,7 +89,7 @@ PHONY += srcrpm-pkg srcrpm-pkg: linux.tar.gz $(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -bs kernel.spec \ - --define='_smp_mflags %{nil}' --define='_sourcedir .' --define='_srcrpmdir .' + --define='_smp_mflags %{nil}' --define='_sourcedir rpmbuild/SOURCES' --define='_srcrpmdir .' # binrpm-pkg # --------------------------------------------------------------------------- diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 3c550960dd39..5f007137f5a0 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -15,15 +15,19 @@ if [ "$1" = prebuilt ]; then MAKE="$MAKE -f $srctree/Makefile" else S= + + mkdir -p rpmbuild/SOURCES + cp linux.tar.gz rpmbuild/SOURCES + cp "${KCONFIG_CONFIG}" rpmbuild/SOURCES/config fi -if grep -q CONFIG_MODULES=y .config; then +if grep -q CONFIG_MODULES=y include/config/auto.conf; then M= else M=DEL fi -if grep -q CONFIG_DRM=y .config; then +if grep -q CONFIG_DRM=y include/config/auto.conf; then PROVIDES=kernel-drm fi @@ -48,7 +52,7 @@ sed -e '/^DEL/d' -e 's/^\t*//' < Date: Thu, 16 Mar 2023 00:50:18 +0900 Subject: kbuild: use git-archive for source package creation Commit 5c3d1d0abb12 ("kbuild: add a tool to list files ignored by git") added a new tool, scripts/list-gitignored. My intention was to create source packages without cleaning the source tree, without relying on git. Linus strongly objected to it, and suggested using 'git archive' instead. [1] [2] [3] This commit goes in that direction - Remove scripts/list-gitignored.c and rewrites Makefiles and scripts to use 'git archive' for building Debian and RPM source packages. It also makes 'make perf-tar*-src-pkg' use 'git archive' again. Going forward, building source packages is only possible in a git-managed tree. Building binary packages does not require git. [1]: https://lore.kernel.org/lkml/CAHk-=wi49sMaC7vY1yMagk7eqLK=1jHeHQ=yZ_k45P=xBccnmA@mail.gmail.com/ [2]: https://lore.kernel.org/lkml/CAHk-=wh5AixGsLeT0qH2oZHKq0FLUTbyTw4qY921L=PwYgoGVw@mail.gmail.com/ [3]: https://lore.kernel.org/lkml/CAHk-=wgM-W6Fu==EoAVCabxyX8eYBz9kNC88-tm9ExRQwA79UQ@mail.gmail.com/ Fixes: 5c3d1d0abb12 ("kbuild: add a tool to list files ignored by git") Fixes: e0ca16749ac3 ("kbuild: make perf-tar*-src-pkg work without relying on git") Suggested-by: Linus Torvalds Signed-off-by: Masahiro Yamada --- Makefile | 7 +- scripts/.gitignore | 1 - scripts/Makefile | 2 +- scripts/Makefile.package | 146 +++--- scripts/check-git | 14 + scripts/list-gitignored.c | 1057 ---------------------------------------- scripts/package/gen-diff-patch | 44 ++ scripts/package/mkdebian | 10 +- scripts/package/mkspec | 10 + scripts/setlocalversion | 45 +- 10 files changed, 181 insertions(+), 1155 deletions(-) create mode 100755 scripts/check-git delete mode 100644 scripts/list-gitignored.c create mode 100755 scripts/package/gen-diff-patch diff --git a/Makefile b/Makefile index dfff9f8d28e5..0de1288dc451 100644 --- a/Makefile +++ b/Makefile @@ -274,8 +274,7 @@ no-dot-config-targets := $(clean-targets) \ cscope gtags TAGS tags help% %docs check% coccicheck \ $(version_h) headers headers_% archheaders archscripts \ %asm-generic kernelversion %src-pkg dt_binding_check \ - outputmakefile rustavailable rustfmt rustfmtcheck \ - scripts_package + outputmakefile rustavailable rustfmt rustfmtcheck # Installation targets should not require compiler. Unfortunately, vdso_install # is an exception where build artifacts may be updated. This must be fixed. no-compiler-targets := $(no-dot-config-targets) install dtbs_install \ @@ -1656,10 +1655,6 @@ distclean: mrproper %pkg: include/config/kernel.release FORCE $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.package $@ -PHONY += scripts_package -scripts_package: scripts_basic - $(Q)$(MAKE) $(build)=scripts scripts/list-gitignored - # Brief documentation of the typical targets used # --------------------------------------------------------------------------- diff --git a/scripts/.gitignore b/scripts/.gitignore index feb43045d1b1..6e9ce6720a05 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -3,7 +3,6 @@ /generate_rust_target /insert-sys-cert /kallsyms -/list-gitignored /module.lds /recordmcount /sign-file diff --git a/scripts/Makefile b/scripts/Makefile index e8917975905c..32b6ba722728 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -38,7 +38,7 @@ HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED endif # The following programs are only built on demand -hostprogs += list-gitignored unifdef +hostprogs += unifdef # The module linker script is preprocessed on demand targets += module.lds diff --git a/scripts/Makefile.package b/scripts/Makefile.package index a0355bdeebff..61f72eb8d9be 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -2,6 +2,7 @@ # Makefile for the different targets used to generate full packages of a kernel include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.lib KERNELPATH := kernel-$(subst -,_,$(KERNELRELEASE)) KBUILD_PKG_ROOTCMD ?="fakeroot -u" @@ -26,54 +27,46 @@ fi ; \ tar -I $(KGZIP) -c $(RCS_TAR_IGNORE) -f $(2).tar.gz \ --transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3) -# .tmp_filelist .tmp_filelist_exclude +# tarball compression # --------------------------------------------------------------------------- -scripts/list-gitignored: FORCE - $(Q)$(MAKE) -f $(srctree)/Makefile scripts_package +%.tar.gz: %.tar + $(call cmd,gzip) -# 1f5d3a6b6532e25a5cdf1f311956b2b03d343a48 removed '*.rej' from .gitignore, -# but it is definitely a generated file. -filechk_filelist = \ - $< --exclude='*.rej' --output=$@_exclude --prefix=./ --rootdir=$(srctree) --stat=- +%.tar.bz2: %.tar + $(call cmd,bzip2) -.tmp_filelist: scripts/list-gitignored FORCE - $(call filechk,filelist) +%.tar.xz: %.tar + $(call cmd,xzmisc) -# tarball -# --------------------------------------------------------------------------- - -quiet_cmd_tar = TAR $@ - cmd_tar = tar -c -f $@ $(tar-compress-opt) $(tar-exclude-opt) \ - --owner=0 --group=0 --sort=name \ - --transform 's:^\.:$*:S' -C $(tar-rootdir) . - -tar-rootdir := $(srctree) +%.tar.zst: %.tar + $(call cmd,zstd) -%.tar: - $(call cmd,tar) - -%.tar.gz: private tar-compress-opt := -I $(KGZIP) -%.tar.gz: - $(call cmd,tar) +# Git +# --------------------------------------------------------------------------- -%.tar.bz2: private tar-compress-opt := -I $(KBZIP2) -%.tar.bz2: - $(call cmd,tar) +filechk_HEAD = git -C $(srctree) rev-parse --verify HEAD 2>/dev/null -%.tar.xz: private tar-compress-opt := -I $(XZ) -%.tar.xz: - $(call cmd,tar) +.tmp_HEAD: check-git FORCE + $(call filechk,HEAD) -%.tar.zst: private tar-compress-opt := -I $(ZSTD) -%.tar.zst: - $(call cmd,tar) +PHONY += check-git +check-git: + @if ! $(srctree)/scripts/check-git; then \ + echo >&2 "error: creating source package requires git repository"; \ + false; \ + fi # Linux source tarball # --------------------------------------------------------------------------- -linux.tar.gz: tar-exclude-opt = --exclude=./$@ --exclude-from=$<_exclude -linux.tar.gz: .tmp_filelist +quiet_cmd_archive_linux = ARCHIVE $@ + cmd_archive_linux = \ + git -C $(srctree) archive --output=$$(realpath $@) --prefix=$(basename $@)/ $$(cat $<) + +targets += linux.tar +linux.tar: .tmp_HEAD FORCE + $(call if_changed,archive_linux) # rpm-pkg # --------------------------------------------------------------------------- @@ -148,74 +141,62 @@ snap-pkg: # dir-pkg tar*-pkg - tarball targets # --------------------------------------------------------------------------- -tar-pkg-tarball = linux-$(KERNELRELEASE)-$(ARCH).$(1) -tar-pkg-phony = $(subst .,,$(1))-pkg - tar-install: FORCE $(Q)$(MAKE) -f $(srctree)/Makefile +$(Q)$(srctree)/scripts/package/buildtar $@ +quiet_cmd_tar = TAR $@ + cmd_tar = cd $<; tar cf ../$@ --owner=root --group=root --sort=name * + +linux-$(KERNELRELEASE)-$(ARCH).tar: tar-install + $(call cmd,tar) + PHONY += dir-pkg dir-pkg: tar-install @echo "Kernel tree successfully created in $<" -define tar-pkg-rule -PHONY += $(tar-pkg-phony) -$(tar-pkg-phony): $(tar-pkg-tarball) +PHONY += tar-pkg +tar-pkg: linux-$(KERNELRELEASE)-$(ARCH).tar @: -$(tar-pkg-tarball): private tar-rootdir := tar-install -$(tar-pkg-tarball): tar-install -endef - -$(foreach x, tar tar.gz tar.bz2 tar.xz tar.zst, $(eval $(call tar-pkg-rule,$(x)))) +tar%-pkg: linux-$(KERNELRELEASE)-$(ARCH).tar.% FORCE + @: # perf-tar*-src-pkg - generate a source tarball with perf source # --------------------------------------------------------------------------- -perf-tar-src-pkg-tarball = perf-$(KERNELVERSION).$(1) -perf-tar-src-pkg-phony = perf-$(subst .,,$(1))-src-pkg - -quiet_cmd_stage_perf_src = STAGE $@ - cmd_stage_perf_src = \ - rm -rf $@; \ - mkdir -p $@; \ - tar -c -f - --exclude-from=$<_exclude -C $(srctree) --files-from=$(srctree)/tools/perf/MANIFEST | \ - tar -x -f - -C $@ - -.tmp_perf: .tmp_filelist - $(call cmd,stage_perf_src) - -filechk_perf_head = \ - if test -z "$(git -C $(srctree) rev-parse --show-cdup 2>/dev/null)" && \ - head=$$(git -C $(srctree) rev-parse --verify HEAD 2>/dev/null); then \ - echo $$head; \ - else \ - echo "not a git tree"; \ - fi +.tmp_perf: + $(Q)mkdir .tmp_perf -.tmp_perf/HEAD: .tmp_perf FORCE - $(call filechk,perf_head) +.tmp_perf/HEAD: .tmp_HEAD | .tmp_perf + $(call cmd,copy) quiet_cmd_perf_version_file = GEN $@ cmd_perf_version_file = cd $(srctree)/tools/perf; util/PERF-VERSION-GEN $(dir $(abspath $@)) -# PERF-VERSION-FILE and HEAD are independent, but this avoids updating the +# PERF-VERSION-FILE and .tmp_HEAD are independent, but this avoids updating the # timestamp of PERF-VERSION-FILE. # The best is to fix tools/perf/util/PERF-VERSION-GEN. -.tmp_perf/PERF-VERSION-FILE: .tmp_perf/HEAD $(srctree)/tools/perf/util/PERF-VERSION-GEN +.tmp_perf/PERF-VERSION-FILE: .tmp_HEAD $(srctree)/tools/perf/util/PERF-VERSION-GEN | .tmp_perf $(call cmd,perf_version_file) -define perf-tar-src-pkg-rule -PHONY += $(perf-tar-src-pkg-phony) -$(perf-tar-src-pkg-phony): $(perf-tar-src-pkg-tarball) - @: +quiet_cmd_archive_perf = ARCHIVE $@ + cmd_archive_perf = \ + git -C $(srctree) archive --output=$$(realpath $@) --prefix=$(basename $@)/ \ + --add-file=$$(realpath $(word 2, $^)) \ + --add-file=$$(realpath $(word 3, $^)) \ + $$(cat $(word 2, $^))^{tree} $$(cat $<) -$(perf-tar-src-pkg-tarball): private tar-rootdir := .tmp_perf -$(perf-tar-src-pkg-tarball): .tmp_filelist .tmp_perf/HEAD .tmp_perf/PERF-VERSION-FILE -endef +targets += perf-$(KERNELVERSION).tar +perf-$(KERNELVERSION).tar: tools/perf/MANIFEST .tmp_perf/HEAD .tmp_perf/PERF-VERSION-FILE FORCE + $(call if_changed,archive_perf) + +PHONY += perf-tar-src-pkg +perf-tar-src-pkg: perf-$(KERNELVERSION).tar + @: -$(foreach x, tar tar.gz tar.bz2 tar.xz tar.zst, $(eval $(call perf-tar-src-pkg-rule,$(x)))) +perf-tar%-src-pkg: perf-$(KERNELVERSION).tar.% FORCE + @: # Help text displayed when executing 'make help' # --------------------------------------------------------------------------- @@ -243,4 +224,13 @@ help: PHONY += FORCE FORCE: +# Read all saved command lines and dependencies for the $(targets) we +# may be building above, using $(if_changed{,_dep}). As an +# optimization, we don't need to read them if the target does not +# exist, we will rebuild anyway in that case. + +existing-targets := $(wildcard $(sort $(targets))) + +-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) + .PHONY: $(PHONY) diff --git a/scripts/check-git b/scripts/check-git new file mode 100755 index 000000000000..2ca6c5df10dd --- /dev/null +++ b/scripts/check-git @@ -0,0 +1,14 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only +# +# succeed if we are in a git repository + +srctree="$(dirname $0)/.." + +if ! git -C "${srctree}" rev-parse --verify HEAD >/dev/null 2>/dev/null; then + exit 1 +fi + +if ! test -z $(git -C "${srctree}" rev-parse --show-cdup 2>/dev/null); then + exit 1 +fi diff --git a/scripts/list-gitignored.c b/scripts/list-gitignored.c deleted file mode 100644 index f9941f8dcd2b..000000000000 --- a/scripts/list-gitignored.c +++ /dev/null @@ -1,1057 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -// -// Traverse the source tree, parsing all .gitignore files, and print file paths -// that are ignored by git. -// The output is suitable to the --exclude-from option of tar. -// This is useful until the --exclude-vcs-ignores option gets working correctly. -// -// Copyright (C) 2023 Masahiro Yamada -// (a lot of code imported from GIT) - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT -// -//---------------------------(IMPORT FROM GIT BEGIN)--------------------------- - -// Copied from environment.c - -static bool ignore_case; - -// Copied from git-compat-util.h - -/* Sane ctype - no locale, and works with signed chars */ -#undef isascii -#undef isspace -#undef isdigit -#undef isalpha -#undef isalnum -#undef isprint -#undef islower -#undef isupper -#undef tolower -#undef toupper -#undef iscntrl -#undef ispunct -#undef isxdigit - -static const unsigned char sane_ctype[256]; -#define GIT_SPACE 0x01 -#define GIT_DIGIT 0x02 -#define GIT_ALPHA 0x04 -#define GIT_GLOB_SPECIAL 0x08 -#define GIT_REGEX_SPECIAL 0x10 -#define GIT_PATHSPEC_MAGIC 0x20 -#define GIT_CNTRL 0x40 -#define GIT_PUNCT 0x80 -#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) -#define isascii(x) (((x) & ~0x7f) == 0) -#define isspace(x) sane_istest(x,GIT_SPACE) -#define isdigit(x) sane_istest(x,GIT_DIGIT) -#define isalpha(x) sane_istest(x,GIT_ALPHA) -#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e) -#define islower(x) sane_iscase(x, 1) -#define isupper(x) sane_iscase(x, 0) -#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) -#define iscntrl(x) (sane_istest(x,GIT_CNTRL)) -#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \ - GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC) -#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1) -#define tolower(x) sane_case((unsigned char)(x), 0x20) -#define toupper(x) sane_case((unsigned char)(x), 0) - -static inline int sane_case(int x, int high) -{ - if (sane_istest(x, GIT_ALPHA)) - x = (x & ~0x20) | high; - return x; -} - -static inline int sane_iscase(int x, int is_lower) -{ - if (!sane_istest(x, GIT_ALPHA)) - return 0; - - if (is_lower) - return (x & 0x20) != 0; - else - return (x & 0x20) == 0; -} - -// Copied from ctype.c - -enum { - S = GIT_SPACE, - A = GIT_ALPHA, - D = GIT_DIGIT, - G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ - R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */ - P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */ - X = GIT_CNTRL, - U = GIT_PUNCT, - Z = GIT_CNTRL | GIT_SPACE -}; - -static const unsigned char sane_ctype[256] = { - X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */ - X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */ - S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ - D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ - A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */ - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ - A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */ - /* Nothing in the 128.. range */ -}; - -// Copied from hex.c - -static const signed char hexval_table[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */ - 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */ - 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */ - -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */ - -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */ - -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */ - -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */ - -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */ - -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */ - -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */ - -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */ - -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */ -}; - -// Copied from wildmatch.h - -#define WM_CASEFOLD 1 -#define WM_PATHNAME 2 - -#define WM_NOMATCH 1 -#define WM_MATCH 0 -#define WM_ABORT_ALL -1 -#define WM_ABORT_TO_STARSTAR -2 - -// Copied from wildmatch.c - -typedef unsigned char uchar; - -// local modification: remove NEGATE_CLASS(2) - -#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \ - && *(class) == *(litmatch) \ - && strncmp((char*)class, litmatch, len) == 0) - -// local modification: simpilify macros -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#define ISGRAPH(c) (isprint(c) && !isspace(c)) -#define ISPRINT(c) isprint(c) -#define ISDIGIT(c) isdigit(c) -#define ISALNUM(c) isalnum(c) -#define ISALPHA(c) isalpha(c) -#define ISCNTRL(c) iscntrl(c) -#define ISLOWER(c) islower(c) -#define ISPUNCT(c) ispunct(c) -#define ISSPACE(c) isspace(c) -#define ISUPPER(c) isupper(c) -#define ISXDIGIT(c) isxdigit(c) - -/* Match pattern "p" against "text" */ -static int dowild(const uchar *p, const uchar *text, unsigned int flags) -{ - uchar p_ch; - const uchar *pattern = p; - - for ( ; (p_ch = *p) != '\0'; text++, p++) { - int matched, match_slash, negated; - uchar t_ch, prev_ch; - if ((t_ch = *text) == '\0' && p_ch != '*') - return WM_ABORT_ALL; - if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) - t_ch = tolower(t_ch); - if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) - p_ch = tolower(p_ch); - switch (p_ch) { - case '\\': - /* Literal match with following character. Note that the test - * in "default" handles the p[1] == '\0' failure case. */ - p_ch = *++p; - /* FALLTHROUGH */ - default: - if (t_ch != p_ch) - return WM_NOMATCH; - continue; - case '?': - /* Match anything but '/'. */ - if ((flags & WM_PATHNAME) && t_ch == '/') - return WM_NOMATCH; - continue; - case '*': - if (*++p == '*') { - const uchar *prev_p = p - 2; - while (*++p == '*') {} - if (!(flags & WM_PATHNAME)) - /* without WM_PATHNAME, '*' == '**' */ - match_slash = 1; - else if ((prev_p < pattern || *prev_p == '/') && - (*p == '\0' || *p == '/' || - (p[0] == '\\' && p[1] == '/'))) { - /* - * Assuming we already match 'foo/' and are at - * , just assume it matches - * nothing and go ahead match the rest of the - * pattern with the remaining string. This - * helps make foo/<*><*>/bar (<> because - * otherwise it breaks C comment syntax) match - * both foo/bar and foo/a/bar. - */ - if (p[0] == '/' && - dowild(p + 1, text, flags) == WM_MATCH) - return WM_MATCH; - match_slash = 1; - } else /* WM_PATHNAME is set */ - match_slash = 0; - } else - /* without WM_PATHNAME, '*' == '**' */ - match_slash = flags & WM_PATHNAME ? 0 : 1; - if (*p == '\0') { - /* Trailing "**" matches everything. Trailing "*" matches - * only if there are no more slash characters. */ - if (!match_slash) { - if (strchr((char *)text, '/')) - return WM_NOMATCH; - } - return WM_MATCH; - } else if (!match_slash && *p == '/') { - /* - * _one_ asterisk followed by a slash - * with WM_PATHNAME matches the next - * directory - */ - const char *slash = strchr((char*)text, '/'); - if (!slash) - return WM_NOMATCH; - text = (const uchar*)slash; - /* the slash is consumed by the top-level for loop */ - break; - } - while (1) { - if (t_ch == '\0') - break; - /* - * Try to advance faster when an asterisk is - * followed by a literal. We know in this case - * that the string before the literal - * must belong to "*". - * If match_slash is false, do not look past - * the first slash as it cannot belong to '*'. - */ - if (!is_glob_special(*p)) { - p_ch = *p; - if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) - p_ch = tolower(p_ch); - while ((t_ch = *text) != '\0' && - (match_slash || t_ch != '/')) { - if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) - t_ch = tolower(t_ch); - if (t_ch == p_ch) - break; - text++; - } - if (t_ch != p_ch) - return WM_NOMATCH; - } - if ((matched = dowild(p, text, flags)) != WM_NOMATCH) { - if (!match_slash || matched != WM_ABORT_TO_STARSTAR) - return matched; - } else if (!match_slash && t_ch == '/') - return WM_ABORT_TO_STARSTAR; - t_ch = *++text; - } - return WM_ABORT_ALL; - case '[': - p_ch = *++p; - if (p_ch == '^') - p_ch = '!'; - /* Assign literal 1/0 because of "matched" comparison. */ - negated = p_ch == '!' ? 1 : 0; - if (negated) { - /* Inverted character class. */ - p_ch = *++p; - } - prev_ch = 0; - matched = 0; - do { - if (!p_ch) - return WM_ABORT_ALL; - if (p_ch == '\\') { - p_ch = *++p; - if (!p_ch) - return WM_ABORT_ALL; - if (t_ch == p_ch) - matched = 1; - } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') { - p_ch = *++p; - if (p_ch == '\\') { - p_ch = *++p; - if (!p_ch) - return WM_ABORT_ALL; - } - if (t_ch <= p_ch && t_ch >= prev_ch) - matched = 1; - else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) { - uchar t_ch_upper = toupper(t_ch); - if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch) - matched = 1; - } - p_ch = 0; /* This makes "prev_ch" get set to 0. */ - } else if (p_ch == '[' && p[1] == ':') { - const uchar *s; - int i; - for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/ - if (!p_ch) - return WM_ABORT_ALL; - i = p - s - 1; - if (i < 0 || p[-1] != ':') { - /* Didn't find ":]", so treat like a normal set. */ - p = s - 2; - p_ch = '['; - if (t_ch == p_ch) - matched = 1; - continue; - } - if (CC_EQ(s,i, "alnum")) { - if (ISALNUM(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "alpha")) { - if (ISALPHA(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "blank")) { - if (ISBLANK(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "cntrl")) { - if (ISCNTRL(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "digit")) { - if (ISDIGIT(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "graph")) { - if (ISGRAPH(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "lower")) { - if (ISLOWER(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "print")) { - if (ISPRINT(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "punct")) { - if (ISPUNCT(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "space")) { - if (ISSPACE(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "upper")) { - if (ISUPPER(t_ch)) - matched = 1; - else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) - matched = 1; - } else if (CC_EQ(s,i, "xdigit")) { - if (ISXDIGIT(t_ch)) - matched = 1; - } else /* malformed [:class:] string */ - return WM_ABORT_ALL; - p_ch = 0; /* This makes "prev_ch" get set to 0. */ - } else if (t_ch == p_ch) - matched = 1; - } while (prev_ch = p_ch, (p_ch = *++p) != ']'); - if (matched == negated || - ((flags & WM_PATHNAME) && t_ch == '/')) - return WM_NOMATCH; - continue; - } - } - - return *text ? WM_NOMATCH : WM_MATCH; -} - -/* Match the "pattern" against the "text" string. */ -static int wildmatch(const char *pattern, const char *text, unsigned int flags) -{ - // local modification: move WM_CASEFOLD here - if (ignore_case) - flags |= WM_CASEFOLD; - - return dowild((const uchar*)pattern, (const uchar*)text, flags); -} - -// Copied from dir.h - -#define PATTERN_FLAG_NODIR 1 -#define PATTERN_FLAG_ENDSWITH 4 -#define PATTERN_FLAG_MUSTBEDIR 8 -#define PATTERN_FLAG_NEGATIVE 16 - -// Copied from dir.c - -static int fspathncmp(const char *a, const char *b, size_t count) -{ - return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count); -} - -static int simple_length(const char *match) -{ - int len = -1; - - for (;;) { - unsigned char c = *match++; - len++; - if (c == '\0' || is_glob_special(c)) - return len; - } -} - -static int no_wildcard(const char *string) -{ - return string[simple_length(string)] == '\0'; -} - -static void parse_path_pattern(const char **pattern, - int *patternlen, - unsigned *flags, - int *nowildcardlen) -{ - const char *p = *pattern; - size_t i, len; - - *flags = 0; - if (*p == '!') { - *flags |= PATTERN_FLAG_NEGATIVE; - p++; - } - len = strlen(p); - if (len && p[len - 1] == '/') { - len--; - *flags |= PATTERN_FLAG_MUSTBEDIR; - } - for (i = 0; i < len; i++) { - if (p[i] == '/') - break; - } - if (i == len) - *flags |= PATTERN_FLAG_NODIR; - *nowildcardlen = simple_length(p); - /* - * we should have excluded the trailing slash from 'p' too, - * but that's one more allocation. Instead just make sure - * nowildcardlen does not exceed real patternlen - */ - if (*nowildcardlen > len) - *nowildcardlen = len; - if (*p == '*' && no_wildcard(p + 1)) - *flags |= PATTERN_FLAG_ENDSWITH; - *pattern = p; - *patternlen = len; -} - -static void trim_trailing_spaces(char *buf) -{ - char *p, *last_space = NULL; - - for (p = buf; *p; p++) - switch (*p) { - case ' ': - if (!last_space) - last_space = p; - break; - case '\\': - p++; - if (!*p) - return; - /* fallthrough */ - default: - last_space = NULL; - } - - if (last_space) - *last_space = '\0'; -} - -static int match_basename(const char *basename, int basenamelen, - const char *pattern, int prefix, int patternlen, - unsigned flags) -{ - if (prefix == patternlen) { - if (patternlen == basenamelen && - !fspathncmp(pattern, basename, basenamelen)) - return 1; - } else if (flags & PATTERN_FLAG_ENDSWITH) { - /* "*literal" matching against "fooliteral" */ - if (patternlen - 1 <= basenamelen && - !fspathncmp(pattern + 1, - basename + basenamelen - (patternlen - 1), - patternlen - 1)) - return 1; - } else { - // local modification: call wildmatch() directly - if (!wildmatch(pattern, basename, flags)) - return 1; - } - return 0; -} - -static int match_pathname(const char *pathname, int pathlen, - const char *base, int baselen, - const char *pattern, int prefix, int patternlen) -{ - // local modification: remove local variables - - /* - * match with FNM_PATHNAME; the pattern has base implicitly - * in front of it. - */ - if (*pattern == '/') { - pattern++; - patternlen--; - prefix--; - } - - /* - * baselen does not count the trailing slash. base[] may or - * may not end with a trailing slash though. - */ - if (pathlen < baselen + 1 || - (baselen && pathname[baselen] != '/') || - fspathncmp(pathname, base, baselen)) - return 0; - - // local modification: simplified because always baselen > 0 - pathname += baselen + 1; - pathlen -= baselen + 1; - - if (prefix) { - /* - * if the non-wildcard part is longer than the - * remaining pathname, surely it cannot match. - */ - if (prefix > pathlen) - return 0; - - if (fspathncmp(pattern, pathname, prefix)) - return 0; - pattern += prefix; - patternlen -= prefix; - pathname += prefix; - pathlen -= prefix; - - /* - * If the whole pattern did not have a wildcard, - * then our prefix match is all we need; we - * do not need to call fnmatch at all. - */ - if (!patternlen && !pathlen) - return 1; - } - - // local modification: call wildmatch() directly - return !wildmatch(pattern, pathname, WM_PATHNAME); -} - -// Copied from git/utf8.c - -static const char utf8_bom[] = "\357\273\277"; - -//----------------------------(IMPORT FROM GIT END)---------------------------- - -struct pattern { - unsigned int flags; - int nowildcardlen; - int patternlen; - int dirlen; - char pattern[]; -}; - -static struct pattern **pattern_list; -static int nr_patterns, alloced_patterns; - -// Remember the number of patterns at each directory level -static int *nr_patterns_at; -// Track the current/max directory level; -static int depth, max_depth; -static bool debug_on; -static FILE *out_fp, *stat_fp; -static char *prefix = ""; -static char *progname; - -static void __attribute__((noreturn)) perror_exit(const char *s) -{ - perror(s); - - exit(EXIT_FAILURE); -} - -static void __attribute__((noreturn)) error_exit(const char *fmt, ...) -{ - va_list args; - - fprintf(stderr, "%s: error: ", progname); - - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); - - exit(EXIT_FAILURE); -} - -static void debug(const char *fmt, ...) -{ - va_list args; - int i; - - if (!debug_on) - return; - - fprintf(stderr, "[DEBUG] "); - - for (i = 0; i < depth * 2; i++) - fputc(' ', stderr); - - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); -} - -static void *xrealloc(void *ptr, size_t size) -{ - ptr = realloc(ptr, size); - if (!ptr) - perror_exit(progname); - - return ptr; -} - -static void *xmalloc(size_t size) -{ - return xrealloc(NULL, size); -} - -// similar to last_matching_pattern_from_list() in GIT -static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir) -{ - int i; - - // Search in the reverse order because the last matching pattern wins. - for (i = nr_patterns - 1; i >= 0; i--) { - struct pattern *p = pattern_list[i]; - unsigned int flags = p->flags; - const char *gitignore_dir = p->pattern + p->patternlen + 1; - bool ignored; - - if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir) - continue; - - if (flags & PATTERN_FLAG_NODIR) { - if (!match_basename(path + dirlen + 1, - pathlen - dirlen - 1, - p->pattern, - p->nowildcardlen, - p->patternlen, - p->flags)) - continue; - } else { - if (!match_pathname(path, pathlen, - gitignore_dir, p->dirlen, - p->pattern, - p->nowildcardlen, - p->patternlen)) - continue; - } - - debug("%s: matches %s%s%s (%s/.gitignore)\n", path, - flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, - flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "", - gitignore_dir); - - ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0; - if (ignored) - debug("Ignore: %s\n", path); - - return ignored; - } - - debug("%s: no match\n", path); - - return false; -} - -static void add_pattern(const char *string, const char *dir, int dirlen) -{ - struct pattern *p; - int patternlen, nowildcardlen; - unsigned int flags; - - parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen); - - if (patternlen == 0) - return; - - p = xmalloc(sizeof(*p) + patternlen + dirlen + 2); - - memcpy(p->pattern, string, patternlen); - p->pattern[patternlen] = 0; - memcpy(p->pattern + patternlen + 1, dir, dirlen); - p->pattern[patternlen + 1 + dirlen] = 0; - - p->patternlen = patternlen; - p->nowildcardlen = nowildcardlen; - p->dirlen = dirlen; - p->flags = flags; - - debug("Add pattern: %s%s%s\n", - flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, - flags & PATTERN_FLAG_MUSTBEDIR ? "/" : ""); - - if (nr_patterns >= alloced_patterns) { - alloced_patterns += 128; - pattern_list = xrealloc(pattern_list, - sizeof(*pattern_list) * alloced_patterns); - } - - pattern_list[nr_patterns++] = p; -} - -// similar to add_patterns_from_buffer() in GIT -static void add_patterns_from_gitignore(const char *dir, int dirlen) -{ - struct stat st; - char path[PATH_MAX], *buf, *entry; - size_t size; - int fd, pathlen, i; - - pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir); - if (pathlen >= sizeof(path)) - error_exit("%s: too long path was truncated\n", path); - - fd = open(path, O_RDONLY | O_NOFOLLOW); - if (fd < 0) { - if (errno != ENOENT) - return perror_exit(path); - return; - } - - if (fstat(fd, &st) < 0) - perror_exit(path); - - size = st.st_size; - - buf = xmalloc(size + 1); - if (read(fd, buf, st.st_size) != st.st_size) - perror_exit(path); - - buf[st.st_size] = '\n'; - if (close(fd)) - perror_exit(path); - - debug("Parse %s\n", path); - - entry = buf; - - // skip utf8 bom - if (!strncmp(entry, utf8_bom, strlen(utf8_bom))) - entry += strlen(utf8_bom); - - for (i = entry - buf; i < size; i++) { - if (buf[i] == '\n') { - if (entry != buf + i && entry[0] != '#') { - buf[i - (i && buf[i-1] == '\r')] = 0; - trim_trailing_spaces(entry); - add_pattern(entry, dir, dirlen); - } - entry = buf + i + 1; - } - } - - free(buf); -} - -// Save the current number of patterns and increment the depth -static void increment_depth(void) -{ - if (depth >= max_depth) { - max_depth += 1; - nr_patterns_at = xrealloc(nr_patterns_at, - sizeof(*nr_patterns_at) * max_depth); - } - - nr_patterns_at[depth] = nr_patterns; - depth++; -} - -// Decrement the depth, and free up the patterns of this directory level. -static void decrement_depth(void) -{ - depth--; - assert(depth >= 0); - - while (nr_patterns > nr_patterns_at[depth]) - free(pattern_list[--nr_patterns]); -} - -static void print_path(const char *path) -{ - // The path always starts with "./" - assert(strlen(path) >= 2); - - // Replace the root directory with a preferred prefix. - // This is useful for the tar command. - fprintf(out_fp, "%s%s\n", prefix, path + 2); -} - -static void print_stat(const char *path, struct stat *st) -{ - if (!stat_fp) - return; - - if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode)) - return; - - assert(strlen(path) >= 2); - - fprintf(stat_fp, "%c %9ld %10ld %s\n", - S_ISLNK(st->st_mode) ? 'l' : '-', - st->st_size, st->st_mtim.tv_sec, path + 2); -} - -// Traverse the entire directory tree, parsing .gitignore files. -// Print file paths that are not tracked by git. -// -// Return true if all files under the directory are ignored, false otherwise. -static bool traverse_directory(const char *dir, int dirlen) -{ - bool all_ignored = true; - DIR *dirp; - - debug("Enter[%d]: %s\n", depth, dir); - increment_depth(); - - add_patterns_from_gitignore(dir, dirlen); - - dirp = opendir(dir); - if (!dirp) - perror_exit(dir); - - while (1) { - struct dirent *d; - struct stat st; - char path[PATH_MAX]; - int pathlen; - bool ignored; - - errno = 0; - d = readdir(dirp); - if (!d) { - if (errno) - perror_exit(dir); - break; - } - - if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, ".")) - continue; - - pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name); - if (pathlen >= sizeof(path)) - error_exit("%s: too long path was truncated\n", path); - - if (lstat(path, &st) < 0) - perror_exit(path); - - if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) || - is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) { - ignored = true; - } else { - if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) - // If all the files in a directory are ignored, - // let's ignore that directory as well. This - // will avoid empty directories in the tarball. - ignored = traverse_directory(path, pathlen); - else - ignored = false; - } - - if (ignored) { - print_path(path); - } else { - print_stat(path, &st); - all_ignored = false; - } - } - - if (closedir(dirp)) - perror_exit(dir); - - decrement_depth(); - debug("Leave[%d]: %s\n", depth, dir); - - return all_ignored; -} - -static void usage(void) -{ - fprintf(stderr, - "usage: %s [options]\n" - "\n" - "Show files that are ignored by git\n" - "\n" - "options:\n" - " -d, --debug print debug messages to stderr\n" - " -e, --exclude PATTERN add the given exclude pattern\n" - " -h, --help show this help message and exit\n" - " -i, --ignore-case Ignore case differences between the patterns and the files\n" - " -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n" - " -p, --prefix PREFIX prefix added to each path (default: empty string)\n" - " -r, --rootdir DIR root of the source tree (default: current working directory)\n" - " -s, --stat FILE output the file stat of non-ignored files to a file\n", - progname); -} - -static void open_output(const char *pathname, FILE **fp) -{ - if (strcmp(pathname, "-")) { - *fp = fopen(pathname, "w"); - if (!*fp) - perror_exit(pathname); - } else { - *fp = stdout; - } -} - -static void close_output(const char *pathname, FILE *fp) -{ - fflush(fp); - - if (ferror(fp)) - error_exit("not all data was written to the output\n"); - - if (fclose(fp)) - perror_exit(pathname); -} - -int main(int argc, char *argv[]) -{ - const char *output = "-"; - const char *rootdir = "."; - const char *stat = NULL; - - progname = strrchr(argv[0], '/'); - if (progname) - progname++; - else - progname = argv[0]; - - while (1) { - static struct option long_options[] = { - {"debug", no_argument, NULL, 'd'}, - {"help", no_argument, NULL, 'h'}, - {"ignore-case", no_argument, NULL, 'i'}, - {"output", required_argument, NULL, 'o'}, - {"prefix", required_argument, NULL, 'p'}, - {"rootdir", required_argument, NULL, 'r'}, - {"stat", required_argument, NULL, 's'}, - {"exclude", required_argument, NULL, 'x'}, - {}, - }; - - int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL); - - if (c == -1) - break; - - switch (c) { - case 'd': - debug_on = true; - break; - case 'h': - usage(); - exit(0); - case 'i': - ignore_case = true; - break; - case 'o': - output = optarg; - break; - case 'p': - prefix = optarg; - break; - case 'r': - rootdir = optarg; - break; - case 's': - stat = optarg; - break; - case 'x': - add_pattern(optarg, ".", strlen(".")); - break; - case '?': - usage(); - /* fallthrough */ - default: - exit(EXIT_FAILURE); - } - } - - open_output(output, &out_fp); - if (stat && stat[0]) - open_output(stat, &stat_fp); - - if (chdir(rootdir)) - perror_exit(rootdir); - - add_pattern(".git/", ".", strlen(".")); - - if (traverse_directory(".", strlen("."))) - print_path("./"); - - assert(depth == 0); - - while (nr_patterns > 0) - free(pattern_list[--nr_patterns]); - free(pattern_list); - free(nr_patterns_at); - - close_output(output, out_fp); - if (stat_fp) - close_output(stat, stat_fp); - - return 0; -} diff --git a/scripts/package/gen-diff-patch b/scripts/package/gen-diff-patch new file mode 100755 index 000000000000..f842ab50a780 --- /dev/null +++ b/scripts/package/gen-diff-patch @@ -0,0 +1,44 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +diff_patch="${1}" +untracked_patch="${2}" +srctree=$(dirname $0)/../.. + +rm -f ${diff_patch} ${untracked_patch} + +if ! ${srctree}/scripts/check-git; then + exit +fi + +mkdir -p "$(dirname ${diff_patch})" "$(dirname ${untracked_patch})" + +git -C "${srctree}" diff HEAD > "${diff_patch}" + +if [ ! -s "${diff_patch}" ]; then + rm -f "${diff_patch}" + exit +fi + +git -C ${srctree} status --porcelain --untracked-files=all | +while read stat path +do + if [ "${stat}" = '??' ]; then + + if ! diff -u /dev/null "${srctree}/${path}" > .tmp_diff && + ! head -n1 .tmp_diff | grep -q "Binary files"; then + { + echo "--- /dev/null" + echo "+++ linux/$path" + cat .tmp_diff | tail -n +3 + } >> ${untracked_patch} + fi + fi +done + +rm -f .tmp_diff + +if [ ! -s "${diff_patch}" ]; then + rm -f "${diff_patch}" + exit +fi diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index e80a661a79ee..e20a2b5be9eb 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -91,7 +91,7 @@ version=$KERNELRELEASE if [ -n "$KDEB_PKGVERSION" ]; then packageversion=$KDEB_PKGVERSION else - packageversion=$version-$($srctree/init/build-version) + packageversion=$(${srctree}/scripts/setlocalversion --no-local ${srctree})-$($srctree/init/build-version) fi sourcename=${KDEB_SOURCENAME:-linux-upstream} @@ -152,6 +152,14 @@ mkdir -p debian/patches } > debian/patches/config echo config > debian/patches/series +$(dirname $0)/gen-diff-patch debian/patches/diff.patch debian/patches/untracked.patch +if [ -f debian/patches/diff.patch ]; then + echo diff.patch >> debian/patches/series +fi +if [ -f debian/patches/untracked.patch ]; then + echo untracked.patch >> debian/patches/series +fi + echo $debarch > debian/arch extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev:native)" extra_build_depends="$extra_build_depends, $(if_enabled_echo CONFIG_SYSTEM_TRUSTED_KEYRING libssl-dev:native)" diff --git a/scripts/package/mkspec b/scripts/package/mkspec index 5f007137f5a0..b7d1dc28a5d6 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -19,6 +19,8 @@ else mkdir -p rpmbuild/SOURCES cp linux.tar.gz rpmbuild/SOURCES cp "${KCONFIG_CONFIG}" rpmbuild/SOURCES/config + $(dirname $0)/gen-diff-patch rpmbuild/SOURCES/diff.patch rpmbuild/SOURCES/untracked.patch + touch rpmbuild/SOURCES/diff.patch rpmbuild/SOURCES/untracked.patch fi if grep -q CONFIG_MODULES=y include/config/auto.conf; then @@ -53,6 +55,8 @@ sed -e '/^DEL/d' -e 's/^\t*//' <&2 + echo "Usage: $0 [--no-local] [srctree]" >&2 exit 1 } +no_local=false +if test "$1" = "--no-local"; then + no_local=true + shift +fi + srctree=. if test $# -gt 0; then srctree=$1 @@ -26,14 +32,22 @@ fi scm_version() { - local short + local short=false + local no_dirty=false local tag - short=false + + while [ $# -gt 0 ]; + do + case "$1" in + --short) + short=true;; + --no-dirty) + no_dirty=true;; + esac + shift + done cd "$srctree" - if test "$1" = "--short"; then - short=true - fi if test -n "$(git rev-parse --show-cdup 2>/dev/null)"; then return @@ -75,6 +89,10 @@ scm_version() printf '%s%s' -g "$(echo $head | cut -c1-12)" fi + if ${no_dirty}; then + return + fi + # Check for uncommitted changes. # This script must avoid any write attempt to the source tree, which # might be read-only. @@ -110,11 +128,6 @@ collect_files() echo "$res" } -if ! test -e include/config/auto.conf; then - echo "Error: kernelrelease not valid - run 'make prepare' to update it" >&2 - exit 1 -fi - if [ -z "${KERNELVERSION}" ]; then echo "KERNELVERSION is not set" >&2 exit 1 @@ -126,6 +139,16 @@ if test ! "$srctree" -ef .; then file_localversion="${file_localversion}$(collect_files "$srctree"/localversion*)" fi +if ${no_local}; then + echo "${KERNELVERSION}$(scm_version --no-dirty)" + exit 0 +fi + +if ! test -e include/config/auto.conf; then + echo "Error: kernelrelease not valid - run 'make prepare' to update it" >&2 + exit 1 +fi + # version string from CONFIG_LOCALVERSION config_localversion=$(sed -n 's/^CONFIG_LOCALVERSION=\(.*\)$/\1/p' include/config/auto.conf) -- cgit From c7df4813b149362248d6ef7be41a311e27bf75fe Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Wed, 8 Mar 2023 18:40:13 +0100 Subject: xsk: Add missing overflow check in xdp_umem_reg The number of chunks can overflow u32. Make sure to return -EINVAL on overflow. Also remove a redundant u32 cast assigning umem->npgs. Fixes: bbff2f321a86 ("xsk: new descriptor addressing scheme") Signed-off-by: Kal Conley Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20230308174013.1114745-1-kal.conley@dectris.com --- net/xdp/xdp_umem.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 4681e8e8ad94..02207e852d79 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -150,10 +150,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { - u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; - u64 npgs, addr = mr->addr, size = mr->len; - unsigned int chunks, chunks_rem; + u32 chunk_size = mr->chunk_size, headroom = mr->headroom; + u64 addr = mr->addr, size = mr->len; + u32 chunks_rem, npgs_rem; + u64 chunks, npgs; int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { @@ -188,8 +189,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (npgs > U32_MAX) return -EINVAL; - chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); - if (chunks == 0) + chunks = div_u64_rem(size, chunk_size, &chunks_rem); + if (!chunks || chunks > U32_MAX) return -EINVAL; if (!unaligned_chunks && chunks_rem) @@ -202,7 +203,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->headroom = headroom; umem->chunk_size = chunk_size; umem->chunks = chunks; - umem->npgs = (u32)npgs; + umem->npgs = npgs; umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; -- cgit From 203873a535d627c668f293be0cb73e26c30f9cc7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 16 Mar 2023 11:38:19 +0100 Subject: fbdev: stifb: Provide valid pixelclock and add fb_check_var() checks Find a valid modeline depending on the machine graphic card configuration and add the fb_check_var() function to validate Xorg provided graphics settings. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- drivers/video/fbdev/stifb.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 3feb6e40d56d..ef8a4c5fc687 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -921,6 +921,28 @@ SETUP_HCRX(struct stifb_info *fb) /* ------------------- driver specific functions --------------------------- */ +static int +stifb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct stifb_info *fb = container_of(info, struct stifb_info, info); + + if (var->xres != fb->info.var.xres || + var->yres != fb->info.var.yres || + var->bits_per_pixel != fb->info.var.bits_per_pixel) + return -EINVAL; + + var->xres_virtual = var->xres; + var->yres_virtual = var->yres; + var->xoffset = 0; + var->yoffset = 0; + var->grayscale = fb->info.var.grayscale; + var->red.length = fb->info.var.red.length; + var->green.length = fb->info.var.green.length; + var->blue.length = fb->info.var.blue.length; + + return 0; +} + static int stifb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, u_int transp, struct fb_info *info) @@ -1145,6 +1167,7 @@ stifb_init_display(struct stifb_info *fb) static const struct fb_ops stifb_ops = { .owner = THIS_MODULE, + .fb_check_var = stifb_check_var, .fb_setcolreg = stifb_setcolreg, .fb_blank = stifb_blank, .fb_fillrect = stifb_fillrect, @@ -1164,6 +1187,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) struct stifb_info *fb; struct fb_info *info; unsigned long sti_rom_address; + char modestr[32]; char *dev_name; int bpp, xres, yres; @@ -1342,6 +1366,9 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT; info->pseudo_palette = &fb->pseudo_palette; + scnprintf(modestr, sizeof(modestr), "%dx%d-%d", xres, yres, bpp); + fb_find_mode(&info->var, info, modestr, NULL, 0, NULL, bpp); + /* This has to be done !!! */ if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0)) goto out_err1; -- cgit From 92e2a00f2987483e1f9253625828622edd442e61 Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Wed, 15 Mar 2023 07:18:31 +0000 Subject: fbdev: nvidia: Fix potential divide by zero variable var->pixclock can be set by user. In case it equals to zero, divide by zero would occur in nvidiafb_set_par. Similar crashes have happened in other fbdev drivers. There is no check and modification on var->pixclock along the call chain to nvidia_check_var and nvidiafb_set_par. We believe it could also be triggered in driver nvidia from user site. Signed-off-by: Wei Chen Signed-off-by: Helge Deller --- drivers/video/fbdev/nvidia/nvidia.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/nvidia/nvidia.c b/drivers/video/fbdev/nvidia/nvidia.c index e60a276b4855..ea4ba3dfb96b 100644 --- a/drivers/video/fbdev/nvidia/nvidia.c +++ b/drivers/video/fbdev/nvidia/nvidia.c @@ -764,6 +764,8 @@ static int nvidiafb_check_var(struct fb_var_screeninfo *var, int pitch, err = 0; NVTRACE_ENTER(); + if (!var->pixclock) + return -EINVAL; var->transp.offset = 0; var->transp.length = 0; -- cgit From d823685486a3446d061fed7c7d2f80af984f119a Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Wed, 15 Mar 2023 08:33:47 +0000 Subject: fbdev: intelfb: Fix potential divide by zero Variable var->pixclock is controlled by user and can be assigned to zero. Without proper check, divide by zero would occur in intelfbhw_validate_mode and intelfbhw_mode_to_hw. Error out if var->pixclock is zero. Signed-off-by: Wei Chen Signed-off-by: Helge Deller --- drivers/video/fbdev/intelfb/intelfbdrv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c index 0a9e5067b201..a81095b2b1ea 100644 --- a/drivers/video/fbdev/intelfb/intelfbdrv.c +++ b/drivers/video/fbdev/intelfb/intelfbdrv.c @@ -1222,6 +1222,9 @@ static int intelfb_check_var(struct fb_var_screeninfo *var, dinfo = GET_DINFO(info); + if (!var->pixclock) + return -EINVAL; + /* update the pitch */ if (intelfbhw_validate_mode(dinfo, var) != 0) return -EINVAL; -- cgit From 61ac4b86a4c047c20d5cb423ddd87496f14d9868 Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Wed, 15 Mar 2023 09:05:18 +0000 Subject: fbdev: lxfb: Fix potential divide by zero var->pixclock can be assigned to zero by user. Without proper check, divide by zero would occur in lx_set_clock. Error out if var->pixclock is zero. Signed-off-by: Wei Chen Signed-off-by: Helge Deller --- drivers/video/fbdev/geode/lxfb_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/geode/lxfb_core.c b/drivers/video/fbdev/geode/lxfb_core.c index 8130e9eee2b4..556d8b1a9e06 100644 --- a/drivers/video/fbdev/geode/lxfb_core.c +++ b/drivers/video/fbdev/geode/lxfb_core.c @@ -235,6 +235,9 @@ static void get_modedb(struct fb_videomode **modedb, unsigned int *size) static int lxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { + if (!var->pixclock) + return -EINVAL; + if (var->xres > 1920 || var->yres > 1440) return -EINVAL; -- cgit From 44a3b36b42acfc433aaaf526191dd12fbb919fdb Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Wed, 15 Mar 2023 09:22:54 +0000 Subject: fbdev: au1200fb: Fix potential divide by zero var->pixclock can be assigned to zero by user. Without proper check, divide by zero would occur when invoking macro PICOS2KHZ in au1200fb_fb_check_var. Error out if var->pixclock is zero. Signed-off-by: Wei Chen Signed-off-by: Helge Deller --- drivers/video/fbdev/au1200fb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c index 81c315454428..b6b22fa4a8a0 100644 --- a/drivers/video/fbdev/au1200fb.c +++ b/drivers/video/fbdev/au1200fb.c @@ -1040,6 +1040,9 @@ static int au1200fb_fb_check_var(struct fb_var_screeninfo *var, u32 pixclock; int screen_size, plane; + if (!var->pixclock) + return -EINVAL; + plane = fbdev->plane; /* Make sure that the mode respect all LCD controller and -- cgit From 29413f05fe34e8824551b91f660fde781249417d Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:29 -0600 Subject: fbdev: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Signed-off-by: Helge Deller --- drivers/video/fbdev/amba-clcd.c | 2 +- drivers/video/fbdev/bw2.c | 2 +- drivers/video/fbdev/cg3.c | 2 +- drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index f65c96d1394d..e45338227be6 100644 --- a/drivers/video/fbdev/amba-clcd.c +++ b/drivers/video/fbdev/amba-clcd.c @@ -854,7 +854,7 @@ static struct clcd_board *clcdfb_of_get_board(struct amba_device *dev) board->caps = CLCD_CAP_ALL; board->check = clcdfb_check; board->decode = clcdfb_decode; - if (of_find_property(node, "memory-region", NULL)) { + if (of_property_present(node, "memory-region")) { board->setup = clcdfb_of_vram_setup; board->mmap = clcdfb_of_vram_mmap; board->remove = clcdfb_of_vram_remove; diff --git a/drivers/video/fbdev/bw2.c b/drivers/video/fbdev/bw2.c index 6403ae07970d..9cbadcd18b25 100644 --- a/drivers/video/fbdev/bw2.c +++ b/drivers/video/fbdev/bw2.c @@ -306,7 +306,7 @@ static int bw2_probe(struct platform_device *op) if (!par->regs) goto out_release_fb; - if (!of_find_property(dp, "width", NULL)) { + if (!of_property_present(dp, "width")) { err = bw2_do_default_mode(par, info, &linebytes); if (err) goto out_unmap_regs; diff --git a/drivers/video/fbdev/cg3.c b/drivers/video/fbdev/cg3.c index bdcc3f6ab666..3a37fff4df36 100644 --- a/drivers/video/fbdev/cg3.c +++ b/drivers/video/fbdev/cg3.c @@ -393,7 +393,7 @@ static int cg3_probe(struct platform_device *op) cg3_blank(FB_BLANK_UNBLANK, info); - if (!of_find_property(dp, "width", NULL)) { + if (!of_property_present(dp, "width")) { err = cg3_do_default_mode(par); if (err) goto out_unmap_screen; diff --git a/drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c b/drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c index 0ae0cab252d3..09f719af0d0c 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c @@ -192,7 +192,7 @@ static int __init omapdss_boot_init(void) omapdss_walk_device(dss, true); for_each_available_child_of_node(dss, child) { - if (!of_find_property(child, "compatible", NULL)) + if (!of_property_present(child, "compatible")) continue; omapdss_walk_device(child, true); -- cgit From d2acf789088bb562cea342b6a24e646df4d47839 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Mar 2023 15:26:05 +0000 Subject: io_uring/rsrc: fix folio accounting | BUG: Bad page state in process kworker/u8:0 pfn:5c001 | page:00000000bfda61c8 refcount:0 mapcount:0 mapping:0000000000000000 index:0x20001 pfn:0x5c001 | head:0000000011409842 order:9 entire_mapcount:0 nr_pages_mapped:0 pincount:1 | anon flags: 0x3fffc00000b0004(uptodate|head|mappedtodisk|swapbacked|node=0|zone=0|lastcpupid=0xffff) | raw: 03fffc0000000000 fffffc0000700001 ffffffff00700903 0000000100000000 | raw: 0000000000000200 0000000000000000 00000000ffffffff 0000000000000000 | head: 03fffc00000b0004 dead000000000100 dead000000000122 ffff00000a809dc1 | head: 0000000000020000 0000000000000000 00000000ffffffff 0000000000000000 | page dumped because: nonzero pincount | CPU: 3 PID: 9 Comm: kworker/u8:0 Not tainted 6.3.0-rc2-00001-gc6811bf0cd87 #1 | Hardware name: linux,dummy-virt (DT) | Workqueue: events_unbound io_ring_exit_work | Call trace: | dump_backtrace+0x13c/0x208 | show_stack+0x34/0x58 | dump_stack_lvl+0x150/0x1a8 | dump_stack+0x20/0x30 | bad_page+0xec/0x238 | free_tail_pages_check+0x280/0x350 | free_pcp_prepare+0x60c/0x830 | free_unref_page+0x50/0x498 | free_compound_page+0xcc/0x100 | free_transhuge_page+0x1f0/0x2b8 | destroy_large_folio+0x80/0xc8 | __folio_put+0xc4/0xf8 | gup_put_folio+0xd0/0x250 | unpin_user_page+0xcc/0x128 | io_buffer_unmap+0xec/0x2c0 | __io_sqe_buffers_unregister+0xa4/0x1e0 | io_ring_exit_work+0x68c/0x1188 | process_one_work+0x91c/0x1a58 | worker_thread+0x48c/0xe30 | kthread+0x278/0x2f0 | ret_from_fork+0x10/0x20 Mark reports an issue with the recent patches coalescing compound pages while registering them in io_uring. The reason is that we try to drop excessive references with folio_put_refs(), but pages were acquired with pin_user_pages(), which has extra accounting and so should be put down with matching unpin_user_pages() or at least gup_put_folio(). As a fix unpin_user_pages() all but first page instead, and let's figure out a better API after. Fixes: 57bebf807e2abcf8 ("io_uring/rsrc: optimise registered huge pages") Reported-by: Mark Rutland Reviewed-by: Jens Axboe Tested-by: Jens Axboe Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/10efd5507d6d1f05ea0f3c601830e08767e189bd.1678980230.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 09a16d709cb5..e2bac9f89902 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1235,7 +1235,13 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, } } if (folio) { - folio_put_refs(folio, nr_pages - 1); + /* + * The pages are bound to the folio, it doesn't + * actually unpin them but drops all but one reference, + * which is usually put down by io_buffer_unmap(). + * Note, needs a better helper. + */ + unpin_user_pages(&pages[1], nr_pages - 1); nr_pages = 1; } } -- cgit From 8f0d196e4dc137470bbd5de98278d941c8002fcb Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 16 Mar 2023 12:16:30 +0100 Subject: block: remove obsolete config BLOCK_COMPAT Before commit bdc1ddad3e5f ("compat_ioctl: block: move blkdev_compat_ioctl() into ioctl.c"), the config BLOCK_COMPAT was used to include compat_ioctl.c into the kernel build. With this commit, the code is moved into ioctl.c and included with the config COMPAT. So, since then, the config BLOCK_COMPAT has no effect and any further purpose. Remove this obsolete config BLOCK_COMPAT. Signed-off-by: Lukas Bulwahn Reviewed-by: Christoph Hellwig Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230316111630.4897-1-lukas.bulwahn@gmail.com Signed-off-by: Jens Axboe --- block/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/Kconfig b/block/Kconfig index 5d9d9c84d516..941b2dca70db 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -204,9 +204,6 @@ config BLK_INLINE_ENCRYPTION_FALLBACK source "block/partitions/Kconfig" -config BLOCK_COMPAT - def_bool COMPAT - config BLK_MQ_PCI def_bool PCI -- cgit From 32d57f667f871bc5a8babbe27ea4c5e668ee0ea8 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 1 Mar 2023 12:59:07 +0100 Subject: iavf: fix inverted Rx hash condition leading to disabled hash Condition, which checks whether the netdev has hashing enabled is inverted. Basically, the tagged commit effectively disabled passing flow hash from descriptor to skb, unless user *disables* it via Ethtool. Commit a876c3ba59a6 ("i40e/i40evf: properly report Rx packet hash") fixed this problem, but only for i40e. Invert the condition now in iavf and unblock passing hash to skbs again. Fixes: 857942fd1aa1 ("i40e: Fix Rx hash reported to the stack by our driver") Reviewed-by: Larysa Zaremba Reviewed-by: Michal Kubiak Signed-off-by: Alexander Lobakin Tested-by: Rafal Romanowski Reviewed-by: Leon Romanovsky Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 18b6a702a1d6..e989feda133c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1096,7 +1096,7 @@ static inline void iavf_rx_hash(struct iavf_ring *ring, cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH << IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT); - if (ring->netdev->features & NETIF_F_RXHASH) + if (!(ring->netdev->features & NETIF_F_RXHASH)) return; if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) { -- cgit From de58647b4301fe181f9c38e8b46f7021584ae427 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 1 Mar 2023 12:59:08 +0100 Subject: iavf: fix non-tunneled IPv6 UDP packet type and hashing Currently, IAVF's decode_rx_desc_ptype() correctly reports payload type of L4 for IPv4 UDP packets and IPv{4,6} TCP, but only L3 for IPv6 UDP. Originally, i40e, ice and iavf were affected. Commit 73df8c9e3e3d ("i40e: Correct UDP packet header for non_tunnel-ipv6") fixed that in i40e, then commit 638a0c8c8861 ("ice: fix incorrect payload indicator on PTYPE") fixed that for ice. IPv6 UDP is L4 obviously. Fix it and make iavf report correct L4 hash type for such packets, so that the stack won't calculate it on CPU when needs it. Fixes: 206812b5fccb ("i40e/i40evf: i40e implementation for skb_set_hash") Reviewed-by: Larysa Zaremba Reviewed-by: Michal Kubiak Signed-off-by: Alexander Lobakin Tested-by: Rafal Romanowski Reviewed-by: Leon Romanovsky Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index 16c490965b61..dd11dbbd5551 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -661,7 +661,7 @@ struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = { /* Non Tunneled IPv6 */ IAVF_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), IAVF_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY3), + IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), IAVF_PTT_UNUSED_ENTRY(91), IAVF_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), IAVF_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), -- cgit From 964290ff32d132bf971d45b29f7de39756dab7c8 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Wed, 15 Mar 2023 13:59:25 -0600 Subject: iavf: do not track VLAN 0 filters When an interface with the maximum number of VLAN filters is brought up, a spurious error is logged: [257.483082] 8021q: adding VLAN 0 to HW filter on device enp0s3 [257.483094] iavf 0000:00:03.0 enp0s3: Max allowed VLAN filters 8. Remove existing VLANs or disable filtering via Ethtool if supported. The VF driver complains that it cannot add the VLAN 0 filter. On the other hand, the PF driver always adds VLAN 0 filter on VF initialization. The VF does not need to ask the PF for that filter at all. Fix the error by not tracking VLAN 0 filters altogether. With that, the check added by commit 0e710a3ffd0c ("iavf: Fix VF driver counting VLAN 0 filters") in iavf_virtchnl.c is useless and might be confusing if left as it suggests that we track VLAN 0. Fixes: 0e710a3ffd0c ("iavf: Fix VF driver counting VLAN 0 filters") Signed-off-by: Ahmed Zaki Reviewed-by: Michal Kubiak Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 8 ++++++++ drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 2 -- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 3273aeb8fa67..327cd9b1af2c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -893,6 +893,10 @@ static int iavf_vlan_rx_add_vid(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); + /* Do not track VLAN 0 filter, always added by the PF on VF init */ + if (!vid) + return 0; + if (!VLAN_FILTERING_ALLOWED(adapter)) return -EIO; @@ -919,6 +923,10 @@ static int iavf_vlan_rx_kill_vid(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); + /* We do not track VLAN 0 filter */ + if (!vid) + return 0; + iavf_del_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto))); if (proto == cpu_to_be16(ETH_P_8021Q)) clear_bit(vid, adapter->vsi.active_cvlans); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 6d23338604bb..4e17d006c52d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -2446,8 +2446,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, list_for_each_entry(f, &adapter->vlan_filter_list, list) { if (f->is_new_vlan) { f->is_new_vlan = false; - if (!f->vlan.vid) - continue; if (f->vlan.tpid == ETH_P_8021Q) set_bit(f->vlan.vid, adapter->vsi.active_cvlans); -- cgit From 65f69851e44d71248b952a687e44759a7abb5016 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 7 Mar 2023 23:29:17 +0800 Subject: igb: revert rtnl_lock() that causes deadlock The commit 6faee3d4ee8b ("igb: Add lock to avoid data race") adds rtnl_lock to eliminate a false data race shown below (FREE from device detaching) | (USE from netdev core) igb_remove | igb_ndo_get_vf_config igb_disable_sriov | vf >= adapter->vfs_allocated_count? kfree(adapter->vf_data) | adapter->vfs_allocated_count = 0 | | memcpy(... adapter->vf_data[vf] The above race will never happen and the extra rtnl_lock causes deadlock below [ 141.420169] [ 141.420672] __schedule+0x2dd/0x840 [ 141.421427] schedule+0x50/0xc0 [ 141.422041] schedule_preempt_disabled+0x11/0x20 [ 141.422678] __mutex_lock.isra.13+0x431/0x6b0 [ 141.423324] unregister_netdev+0xe/0x20 [ 141.423578] igbvf_remove+0x45/0xe0 [igbvf] [ 141.423791] pci_device_remove+0x36/0xb0 [ 141.423990] device_release_driver_internal+0xc1/0x160 [ 141.424270] pci_stop_bus_device+0x6d/0x90 [ 141.424507] pci_stop_and_remove_bus_device+0xe/0x20 [ 141.424789] pci_iov_remove_virtfn+0xba/0x120 [ 141.425452] sriov_disable+0x2f/0xf0 [ 141.425679] igb_disable_sriov+0x4e/0x100 [igb] [ 141.426353] igb_remove+0xa0/0x130 [igb] [ 141.426599] pci_device_remove+0x36/0xb0 [ 141.426796] device_release_driver_internal+0xc1/0x160 [ 141.427060] driver_detach+0x44/0x90 [ 141.427253] bus_remove_driver+0x55/0xe0 [ 141.427477] pci_unregister_driver+0x2a/0xa0 [ 141.428296] __x64_sys_delete_module+0x141/0x2b0 [ 141.429126] ? mntput_no_expire+0x4a/0x240 [ 141.429363] ? syscall_trace_enter.isra.19+0x126/0x1a0 [ 141.429653] do_syscall_64+0x5b/0x80 [ 141.429847] ? exit_to_user_mode_prepare+0x14d/0x1c0 [ 141.430109] ? syscall_exit_to_user_mode+0x12/0x30 [ 141.430849] ? do_syscall_64+0x67/0x80 [ 141.431083] ? syscall_exit_to_user_mode_prepare+0x183/0x1b0 [ 141.431770] ? syscall_exit_to_user_mode+0x12/0x30 [ 141.432482] ? do_syscall_64+0x67/0x80 [ 141.432714] ? exc_page_fault+0x64/0x140 [ 141.432911] entry_SYSCALL_64_after_hwframe+0x72/0xdc Since the igb_disable_sriov() will call pci_disable_sriov() before releasing any resources, the netdev core will synchronize the cleanup to avoid any races. This patch removes the useless rtnl_(un)lock to guarantee correctness. CC: stable@vger.kernel.org Fixes: 6faee3d4ee8b ("igb: Add lock to avoid data race") Reported-by: Corinna Vinschen Link: https://lore.kernel.org/intel-wired-lan/ZAcJvkEPqWeJHO2r@calimero.vinschen.de/ Signed-off-by: Lin Ma Tested-by: Corinna Vinschen Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 03bc1e8af575..5532361b0e94 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3863,9 +3863,7 @@ static void igb_remove(struct pci_dev *pdev) igb_release_hw_control(adapter); #ifdef CONFIG_PCI_IOV - rtnl_lock(); igb_disable_sriov(pdev); - rtnl_unlock(); #endif unregister_netdev(netdev); -- cgit From 50f303496d92e25b79bdfb73e3707ad0684ad67f Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 22 Nov 2022 18:28:03 +0900 Subject: igb: Enable SR-IOV after reinit Enabling SR-IOV causes the virtual functions to make requests to the PF via the mailbox. Notably, E1000_VF_RESET request will happen during the initialization of the VF. However, unless the reinit is done, the VMMB interrupt, which delivers mailbox interrupt from VF to PF will be kept masked and such requests will be silently ignored. Enable SR-IOV at the very end of the procedure to configure the device for SR-IOV so that the PF is configured properly for SR-IOV when a VF is activated. Fixes: fa44f2f185f7 ("igb: Enable SR-IOV configuration via PCI sysfs interface") Signed-off-by: Akihiko Odaki Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 135 +++++++++++++----------------- 1 file changed, 58 insertions(+), 77 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 5532361b0e94..274c781b5547 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -109,6 +109,7 @@ static void igb_free_all_rx_resources(struct igb_adapter *); static void igb_setup_mrqc(struct igb_adapter *); static int igb_probe(struct pci_dev *, const struct pci_device_id *); static void igb_remove(struct pci_dev *pdev); +static void igb_init_queue_configuration(struct igb_adapter *adapter); static int igb_sw_init(struct igb_adapter *); int igb_open(struct net_device *); int igb_close(struct net_device *); @@ -175,9 +176,7 @@ static void igb_nfc_filter_restore(struct igb_adapter *adapter); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); -static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs); -static int igb_disable_sriov(struct pci_dev *dev); -static int igb_pci_disable_sriov(struct pci_dev *dev); +static int igb_disable_sriov(struct pci_dev *dev, bool reinit); #endif static int igb_suspend(struct device *); @@ -3665,7 +3664,7 @@ err_sw_init: kfree(adapter->shadow_vfta); igb_clear_interrupt_scheme(adapter); #ifdef CONFIG_PCI_IOV - igb_disable_sriov(pdev); + igb_disable_sriov(pdev, false); #endif pci_iounmap(pdev, adapter->io_addr); err_ioremap: @@ -3679,7 +3678,38 @@ err_dma: } #ifdef CONFIG_PCI_IOV -static int igb_disable_sriov(struct pci_dev *pdev) +static int igb_sriov_reinit(struct pci_dev *dev) +{ + struct net_device *netdev = pci_get_drvdata(dev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct pci_dev *pdev = adapter->pdev; + + rtnl_lock(); + + if (netif_running(netdev)) + igb_close(netdev); + else + igb_reset(adapter); + + igb_clear_interrupt_scheme(adapter); + + igb_init_queue_configuration(adapter); + + if (igb_init_interrupt_scheme(adapter, true)) { + rtnl_unlock(); + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + if (netif_running(netdev)) + igb_open(netdev); + + rtnl_unlock(); + + return 0; +} + +static int igb_disable_sriov(struct pci_dev *pdev, bool reinit) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); @@ -3713,10 +3743,10 @@ static int igb_disable_sriov(struct pci_dev *pdev) adapter->flags |= IGB_FLAG_DMAC; } - return 0; + return reinit ? igb_sriov_reinit(pdev) : 0; } -static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) +static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs, bool reinit) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); @@ -3781,12 +3811,6 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) "Unable to allocate memory for VF MAC filter list\n"); } - /* only call pci_enable_sriov() if no VFs are allocated already */ - if (!old_vfs) { - err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); - if (err) - goto err_out; - } dev_info(&pdev->dev, "%d VFs allocated\n", adapter->vfs_allocated_count); for (i = 0; i < adapter->vfs_allocated_count; i++) @@ -3794,6 +3818,17 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) /* DMA Coalescing is not supported in IOV mode. */ adapter->flags &= ~IGB_FLAG_DMAC; + + if (reinit) { + err = igb_sriov_reinit(pdev); + if (err) + goto err_out; + } + + /* only call pci_enable_sriov() if no VFs are allocated already */ + if (!old_vfs) + err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); + goto out; err_out: @@ -3863,7 +3898,7 @@ static void igb_remove(struct pci_dev *pdev) igb_release_hw_control(adapter); #ifdef CONFIG_PCI_IOV - igb_disable_sriov(pdev); + igb_disable_sriov(pdev, false); #endif unregister_netdev(netdev); @@ -3909,7 +3944,7 @@ static void igb_probe_vfs(struct igb_adapter *adapter) igb_reset_interrupt_capability(adapter); pci_sriov_set_totalvfs(pdev, 7); - igb_enable_sriov(pdev, max_vfs); + igb_enable_sriov(pdev, max_vfs, false); #endif /* CONFIG_PCI_IOV */ } @@ -9518,71 +9553,17 @@ static void igb_shutdown(struct pci_dev *pdev) } } -#ifdef CONFIG_PCI_IOV -static int igb_sriov_reinit(struct pci_dev *dev) -{ - struct net_device *netdev = pci_get_drvdata(dev); - struct igb_adapter *adapter = netdev_priv(netdev); - struct pci_dev *pdev = adapter->pdev; - - rtnl_lock(); - - if (netif_running(netdev)) - igb_close(netdev); - else - igb_reset(adapter); - - igb_clear_interrupt_scheme(adapter); - - igb_init_queue_configuration(adapter); - - if (igb_init_interrupt_scheme(adapter, true)) { - rtnl_unlock(); - dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); - return -ENOMEM; - } - - if (netif_running(netdev)) - igb_open(netdev); - - rtnl_unlock(); - - return 0; -} - -static int igb_pci_disable_sriov(struct pci_dev *dev) -{ - int err = igb_disable_sriov(dev); - - if (!err) - err = igb_sriov_reinit(dev); - - return err; -} - -static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs) -{ - int err = igb_enable_sriov(dev, num_vfs); - - if (err) - goto out; - - err = igb_sriov_reinit(dev); - if (!err) - return num_vfs; - -out: - return err; -} - -#endif static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs) { #ifdef CONFIG_PCI_IOV - if (num_vfs == 0) - return igb_pci_disable_sriov(dev); - else - return igb_pci_enable_sriov(dev, num_vfs); + int err; + + if (num_vfs == 0) { + return igb_disable_sriov(dev, true); + } else { + err = igb_enable_sriov(dev, num_vfs, true); + return err ? err : num_vfs; + } #endif return 0; } -- cgit From 85eb39bb39cbb5c086df1e19ba67cc1366693a77 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Tue, 22 Nov 2022 10:28:52 +0800 Subject: intel/igbvf: free irq on the error path in igbvf_request_msix() In igbvf_request_msix(), irqs have not been freed on the err path, we need to free it. Fix it. Fixes: d4e0fe01a38a ("igbvf: add new driver to support 82576 virtual functions") Signed-off-by: Gaosheng Cui Reviewed-by: Maciej Fijalkowski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igbvf/netdev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 3a32809510fc..72cb1b56e9f2 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1074,7 +1074,7 @@ static int igbvf_request_msix(struct igbvf_adapter *adapter) igbvf_intr_msix_rx, 0, adapter->rx_ring->name, netdev); if (err) - goto out; + goto free_irq_tx; adapter->rx_ring->itr_register = E1000_EITR(vector); adapter->rx_ring->itr_val = adapter->current_itr; @@ -1083,10 +1083,14 @@ static int igbvf_request_msix(struct igbvf_adapter *adapter) err = request_irq(adapter->msix_entries[vector].vector, igbvf_msix_other, 0, netdev->name, netdev); if (err) - goto out; + goto free_irq_rx; igbvf_configure_msix(adapter); return 0; +free_irq_rx: + free_irq(adapter->msix_entries[--vector].vector, netdev); +free_irq_tx: + free_irq(adapter->msix_entries[--vector].vector, netdev); out: return err; } -- cgit From 02c83791ef969c6a8a150b4927193d0d0e50fb23 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 1 Dec 2022 19:20:03 +0900 Subject: igbvf: Regard vf reset nack as success vf reset nack actually represents the reset operation itself is performed but no address is assigned. Therefore, e1000_reset_hw_vf should fill the "perm_addr" with the zero address and return success on such an occasion. This prevents its callers in netdev.c from saying PF still resetting, and instead allows them to correctly report that no address is assigned. Fixes: 6ddbc4cf1f4d ("igb: Indicate failure on vf reset for empty mac address") Signed-off-by: Akihiko Odaki Reviewed-by: Leon Romanovsky Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igbvf/vf.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igbvf/vf.c b/drivers/net/ethernet/intel/igbvf/vf.c index b8ba3f94c363..a47a2e3e548c 100644 --- a/drivers/net/ethernet/intel/igbvf/vf.c +++ b/drivers/net/ethernet/intel/igbvf/vf.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2009 - 2018 Intel Corporation. */ +#include + #include "vf.h" static s32 e1000_check_for_link_vf(struct e1000_hw *hw); @@ -131,11 +133,16 @@ static s32 e1000_reset_hw_vf(struct e1000_hw *hw) /* set our "perm_addr" based on info provided by PF */ ret_val = mbx->ops.read_posted(hw, msgbuf, 3); if (!ret_val) { - if (msgbuf[0] == (E1000_VF_RESET | - E1000_VT_MSGTYPE_ACK)) + switch (msgbuf[0]) { + case E1000_VF_RESET | E1000_VT_MSGTYPE_ACK: memcpy(hw->mac.perm_addr, addr, ETH_ALEN); - else + break; + case E1000_VF_RESET | E1000_VT_MSGTYPE_NACK: + eth_zero_addr(hw->mac.perm_addr); + break; + default: ret_val = -E1000_ERR_MAC_INIT; + } } } -- cgit From 2b4cc3d3f4d8ec42961e98568a0afeee96a943ab Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Tue, 7 Mar 2023 15:45:31 +0900 Subject: igc: fix the validation logic for taprio's gate list The check introduced in the commit a5fd39464a40 ("igc: Lift TAPRIO schedule restriction") can detect a false positive error in some corner case. For instance, tc qdisc replace ... taprio num_tc 4 ... sched-entry S 0x01 100000 # slot#1 sched-entry S 0x03 100000 # slot#2 sched-entry S 0x04 100000 # slot#3 sched-entry S 0x08 200000 # slot#4 flags 0x02 # hardware offload Here the queue#0 (the first queue) is on at the slot#1 and #2, and off at the slot#3 and #4. Under the current logic, when the slot#4 is examined, validate_schedule() returns *false* since the enablement count for the queue#0 is two and it is already off at the previous slot (i.e. #3). But this definition is truely correct. Let's fix the logic to enforce a strict validation for consecutively-opened slots. Fixes: a5fd39464a40 ("igc: Lift TAPRIO schedule restriction") Signed-off-by: AKASHI Takahiro Reviewed-by: Kurt Kanzenbach Acked-by: Vinicius Costa Gomes Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2928a6c73692..25fc6c65209b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6010,18 +6010,18 @@ static bool validate_schedule(struct igc_adapter *adapter, if (e->command != TC_TAPRIO_CMD_SET_GATES) return false; - for (i = 0; i < adapter->num_tx_queues; i++) { - if (e->gate_mask & BIT(i)) + for (i = 0; i < adapter->num_tx_queues; i++) + if (e->gate_mask & BIT(i)) { queue_uses[i]++; - /* There are limitations: A single queue cannot be - * opened and closed multiple times per cycle unless the - * gate stays open. Check for it. - */ - if (queue_uses[i] > 1 && - !(prev->gate_mask & BIT(i))) - return false; - } + /* There are limitations: A single queue cannot + * be opened and closed multiple times per cycle + * unless the gate stays open. Check for it. + */ + if (queue_uses[i] > 1 && + !(prev->gate_mask & BIT(i))) + return false; + } } return true; -- cgit From 6de4b1ab470fe52351415217ac6dffddee571c45 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 10 Mar 2023 13:42:08 -0800 Subject: xfs: try to idiot-proof the allocators In porting his development branch to 6.3-rc1, yours truly has repeatedly screwed up the args->pag being fed to the xfs_alloc_vextent* functions. Add some debugging assertions to test the preconditions required of the callers. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6a037173d20d..8999e38e1bed 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3279,6 +3279,9 @@ xfs_alloc_vextent_this_ag( xfs_agnumber_t minimum_agno; int error; + ASSERT(args->pag != NULL); + ASSERT(args->pag->pag_agno == agno); + args->agno = agno; args->agbno = 0; error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0), @@ -3394,6 +3397,8 @@ xfs_alloc_vextent_start_ag( bool bump_rotor = false; int error; + ASSERT(args->pag == NULL); + args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); @@ -3442,6 +3447,8 @@ xfs_alloc_vextent_first_ag( xfs_agnumber_t start_agno; int error; + ASSERT(args->pag == NULL); + args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); @@ -3470,6 +3477,9 @@ xfs_alloc_vextent_exact_bno( xfs_agnumber_t minimum_agno; int error; + ASSERT(args->pag != NULL); + ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target)); + args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); @@ -3502,6 +3512,9 @@ xfs_alloc_vextent_near_bno( bool needs_perag = args->pag == NULL; int error; + if (!needs_perag) + ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target)); + args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); -- cgit From 077706165717686a2a6a71405fef036cd5b37ae0 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 14 Mar 2023 14:05:48 +0300 Subject: virtio/vsock: don't use skbuff state to account credit 'skb->len' can vary when we partially read the data, this complicates the calculation of credit to be updated in 'virtio_transport_inc_rx_pkt()/ virtio_transport_dec_rx_pkt()'. Also in 'virtio_transport_dec_rx_pkt()' we were miscalculating the credit since 'skb->len' was redundant. For these reasons, let's replace the use of skbuff state to calculate new 'rx_bytes'/'fwd_cnt' values with explicit value as input argument. This makes code more simple, because it is not needed to change skbuff state before each call to update 'rx_bytes'/'fwd_cnt'. Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Bobby Eshleman Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a1581c77cf84..618680fd9906 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -241,21 +241,18 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, } static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, - struct sk_buff *skb) + u32 len) { - if (vvs->rx_bytes + skb->len > vvs->buf_alloc) + if (vvs->rx_bytes + len > vvs->buf_alloc) return false; - vvs->rx_bytes += skb->len; + vvs->rx_bytes += len; return true; } static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, - struct sk_buff *skb) + u32 len) { - int len; - - len = skb_headroom(skb) - sizeof(struct virtio_vsock_hdr) - skb->len; vvs->rx_bytes -= len; vvs->fwd_cnt += len; } @@ -388,7 +385,9 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, skb_pull(skb, bytes); if (skb->len == 0) { - virtio_transport_dec_rx_pkt(vvs, skb); + u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); + + virtio_transport_dec_rx_pkt(vvs, pkt_len); consume_skb(skb); } else { __skb_queue_head(&vvs->rx_queue, skb); @@ -437,17 +436,17 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, while (!msg_ready) { struct virtio_vsock_hdr *hdr; + size_t pkt_len; skb = __skb_dequeue(&vvs->rx_queue); if (!skb) break; hdr = virtio_vsock_hdr(skb); + pkt_len = (size_t)le32_to_cpu(hdr->len); if (dequeued_len >= 0) { - size_t pkt_len; size_t bytes_to_copy; - pkt_len = (size_t)le32_to_cpu(hdr->len); bytes_to_copy = min(user_buf_len, pkt_len); if (bytes_to_copy) { @@ -484,7 +483,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, msg->msg_flags |= MSG_EOR; } - virtio_transport_dec_rx_pkt(vvs, skb); + virtio_transport_dec_rx_pkt(vvs, pkt_len); kfree_skb(skb); } @@ -1040,7 +1039,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, spin_lock_bh(&vvs->rx_lock); - can_enqueue = virtio_transport_inc_rx_pkt(vvs, skb); + can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); if (!can_enqueue) { free_pkt = true; goto out; -- cgit From 6825e6b4f8e53799d83bc39ca6ec5baed4e2adde Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 14 Mar 2023 14:06:53 +0300 Subject: virtio/vsock: remove redundant 'skb_pull()' call Since we now no longer use 'skb->len' to update credit, there is no sense to update skbuff state, because it is used only once after dequeue to copy data and then will be released. Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Bobby Eshleman Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 618680fd9906..9a411475e201 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -465,7 +465,6 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, dequeued_len = err; } else { user_buf_len -= bytes_to_copy; - skb_pull(skb, bytes_to_copy); } spin_lock_bh(&vvs->rx_lock); -- cgit From 8daaf39f7f6ef53a11817f6a11ec104016c3545f Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 14 Mar 2023 14:08:20 +0300 Subject: virtio/vsock: don't drop skbuff on copy failure This returns behaviour of SOCK_STREAM read as before skbuff usage. When copying to user fails current skbuff won't be dropped, but returned to sockets's queue. Technically instead of 'skb_dequeue()', 'skb_peek()' is called and when skbuff becomes empty, it is removed from queue by '__skb_unlink()'. Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Bobby Eshleman Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 9a411475e201..6564192e7f20 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -364,7 +364,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, spin_lock_bh(&vvs->rx_lock); while (total < len && !skb_queue_empty(&vvs->rx_queue)) { - skb = __skb_dequeue(&vvs->rx_queue); + skb = skb_peek(&vvs->rx_queue); bytes = len - total; if (bytes > skb->len) @@ -388,9 +388,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); virtio_transport_dec_rx_pkt(vvs, pkt_len); + __skb_unlink(skb, &vvs->rx_queue); consume_skb(skb); - } else { - __skb_queue_head(&vvs->rx_queue, skb); } } -- cgit From 7e699d2a4e8104d304e921ac5e0a0c73f0f7b623 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Tue, 14 Mar 2023 14:09:27 +0300 Subject: test/vsock: copy to user failure test This adds SOCK_STREAM and SOCK_SEQPACKET tests for invalid buffer case. It tries to read data to NULL buffer (data already presents in socket's queue), then uses valid buffer. For SOCK_STREAM second read must return data, because skbuff is not dropped, but for SOCK_SEQPACKET skbuff will be dropped by kernel, and 'recv()' will return EAGAIN. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- tools/testing/vsock/vsock_test.c | 118 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 67e9f9df3a8c..3de10dbb50f5 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -860,6 +860,114 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) close(fd); } +#define INV_BUF_TEST_DATA_LEN 512 + +static void test_inv_buf_client(const struct test_opts *opts, bool stream) +{ + unsigned char data[INV_BUF_TEST_DATA_LEN] = {0}; + ssize_t ret; + int fd; + + if (stream) + fd = vsock_stream_connect(opts->peer_cid, 1234); + else + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + control_expectln("SENDDONE"); + + /* Use invalid buffer here. */ + ret = recv(fd, NULL, sizeof(data), 0); + if (ret != -1) { + fprintf(stderr, "expected recv(2) failure, got %zi\n", ret); + exit(EXIT_FAILURE); + } + + if (errno != ENOMEM) { + fprintf(stderr, "unexpected recv(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } + + ret = recv(fd, data, sizeof(data), MSG_DONTWAIT); + + if (stream) { + /* For SOCK_STREAM we must continue reading. */ + if (ret != sizeof(data)) { + fprintf(stderr, "expected recv(2) success, got %zi\n", ret); + exit(EXIT_FAILURE); + } + /* Don't check errno in case of success. */ + } else { + /* For SOCK_SEQPACKET socket's queue must be empty. */ + if (ret != -1) { + fprintf(stderr, "expected recv(2) failure, got %zi\n", ret); + exit(EXIT_FAILURE); + } + + if (errno != EAGAIN) { + fprintf(stderr, "unexpected recv(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } + } + + control_writeln("DONE"); + + close(fd); +} + +static void test_inv_buf_server(const struct test_opts *opts, bool stream) +{ + unsigned char data[INV_BUF_TEST_DATA_LEN] = {0}; + ssize_t res; + int fd; + + if (stream) + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + else + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + res = send(fd, data, sizeof(data), 0); + if (res != sizeof(data)) { + fprintf(stderr, "unexpected send(2) result %zi\n", res); + exit(EXIT_FAILURE); + } + + control_writeln("SENDDONE"); + + control_expectln("DONE"); + + close(fd); +} + +static void test_stream_inv_buf_client(const struct test_opts *opts) +{ + test_inv_buf_client(opts, true); +} + +static void test_stream_inv_buf_server(const struct test_opts *opts) +{ + test_inv_buf_server(opts, true); +} + +static void test_seqpacket_inv_buf_client(const struct test_opts *opts) +{ + test_inv_buf_client(opts, false); +} + +static void test_seqpacket_inv_buf_server(const struct test_opts *opts) +{ + test_inv_buf_server(opts, false); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -920,6 +1028,16 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_bigmsg_client, .run_server = test_seqpacket_bigmsg_server, }, + { + .name = "SOCK_STREAM test invalid buffer", + .run_client = test_stream_inv_buf_client, + .run_server = test_stream_inv_buf_server, + }, + { + .name = "SOCK_SEQPACKET test invalid buffer", + .run_client = test_seqpacket_inv_buf_client, + .run_server = test_seqpacket_inv_buf_server, + }, {}, }; -- cgit From b830c9642386867863ac64295185f896ff2928ac Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 14 Mar 2023 10:45:43 -0700 Subject: ice: xsk: disable txq irq before flushing hw ice_qp_dis() intends to stop a given queue pair that is a target of xsk pool attach/detach. One of the steps is to disable interrupts on these queues. It currently is broken in a way that txq irq is turned off *after* HW flush which in turn takes no effect. ice_qp_dis(): -> ice_qvec_dis_irq() --> disable rxq irq --> flush hw -> ice_vsi_stop_tx_ring() -->disable txq irq Below splat can be triggered by following steps: - start xdpsock WITHOUT loading xdp prog - run xdp_rxq_info with XDP_TX action on this interface - start traffic - terminate xdpsock [ 256.312485] BUG: kernel NULL pointer dereference, address: 0000000000000018 [ 256.319560] #PF: supervisor read access in kernel mode [ 256.324775] #PF: error_code(0x0000) - not-present page [ 256.329994] PGD 0 P4D 0 [ 256.332574] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 256.337006] CPU: 3 PID: 32 Comm: ksoftirqd/3 Tainted: G OE 6.2.0-rc5+ #51 [ 256.345218] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [ 256.355807] RIP: 0010:ice_clean_rx_irq_zc+0x9c/0x7d0 [ice] [ 256.361423] Code: b7 8f 8a 00 00 00 66 39 ca 0f 84 f1 04 00 00 49 8b 47 40 4c 8b 24 d0 41 0f b7 45 04 66 25 ff 3f 66 89 04 24 0f 84 85 02 00 00 <49> 8b 44 24 18 0f b7 14 24 48 05 00 01 00 00 49 89 04 24 49 89 44 [ 256.380463] RSP: 0018:ffffc900088bfd20 EFLAGS: 00010206 [ 256.385765] RAX: 000000000000003c RBX: 0000000000000035 RCX: 000000000000067f [ 256.393012] RDX: 0000000000000775 RSI: 0000000000000000 RDI: ffff8881deb3ac80 [ 256.400256] RBP: 000000000000003c R08: ffff889847982710 R09: 0000000000010000 [ 256.407500] R10: ffffffff82c060c0 R11: 0000000000000004 R12: 0000000000000000 [ 256.414746] R13: ffff88811165eea0 R14: ffffc9000d255000 R15: ffff888119b37600 [ 256.421990] FS: 0000000000000000(0000) GS:ffff8897e0cc0000(0000) knlGS:0000000000000000 [ 256.430207] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 256.436036] CR2: 0000000000000018 CR3: 0000000005c0a006 CR4: 00000000007706e0 [ 256.443283] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 256.450527] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 256.457770] PKRU: 55555554 [ 256.460529] Call Trace: [ 256.463015] [ 256.465157] ? ice_xmit_zc+0x6e/0x150 [ice] [ 256.469437] ice_napi_poll+0x46d/0x680 [ice] [ 256.473815] ? _raw_spin_unlock_irqrestore+0x1b/0x40 [ 256.478863] __napi_poll+0x29/0x160 [ 256.482409] net_rx_action+0x136/0x260 [ 256.486222] __do_softirq+0xe8/0x2e5 [ 256.489853] ? smpboot_thread_fn+0x2c/0x270 [ 256.494108] run_ksoftirqd+0x2a/0x50 [ 256.497747] smpboot_thread_fn+0x1c1/0x270 [ 256.501907] ? __pfx_smpboot_thread_fn+0x10/0x10 [ 256.506594] kthread+0xea/0x120 [ 256.509785] ? __pfx_kthread+0x10/0x10 [ 256.513597] ret_from_fork+0x29/0x50 [ 256.517238] In fact, irqs were not disabled and napi managed to be scheduled and run while xsk_pool pointer was still valid, but SW ring of xdp_buff pointers was already freed. To fix this, call ice_qvec_dis_irq() after ice_vsi_stop_tx_ring(). Also while at it, remove redundant ice_clean_rx_ring() call - this is handled in ice_qp_clean_rings(). Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Reviewed-by: Larysa Zaremba Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: John Fastabend Signed-off-by: Tony Nguyen Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_xsk.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 31565bbafa22..d1e489da7363 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -184,8 +184,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) } netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - ice_fill_txq_meta(vsi, tx_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); if (err) @@ -200,10 +198,11 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; - ice_clean_rx_ring(rx_ring); ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); -- cgit From d3aa3e060c4a80827eb801fc448debc9daa7c46b Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 16 Mar 2023 14:55:06 +0800 Subject: dm stats: check for and propagate alloc_percpu failure Check alloc_precpu()'s return value and return an error from dm_stats_init() if it fails. Update alloc_dev() to fail if dm_stats_init() does. Otherwise, a NULL pointer dereference will occur in dm_stats_cleanup() even if dm-stats isn't being actively used. Fixes: fd2ed4d25270 ("dm: add statistics support") Cc: stable@vger.kernel.org Signed-off-by: Jiasheng Jiang Signed-off-by: Mike Snitzer --- drivers/md/dm-stats.c | 7 ++++++- drivers/md/dm-stats.h | 2 +- drivers/md/dm.c | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index c21a19ab73f7..db2d997a6c18 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -188,7 +188,7 @@ static int dm_stat_in_flight(struct dm_stat_shared *shared) atomic_read(&shared->in_flight[WRITE]); } -void dm_stats_init(struct dm_stats *stats) +int dm_stats_init(struct dm_stats *stats) { int cpu; struct dm_stats_last_position *last; @@ -197,11 +197,16 @@ void dm_stats_init(struct dm_stats *stats) INIT_LIST_HEAD(&stats->list); stats->precise_timestamps = false; stats->last = alloc_percpu(struct dm_stats_last_position); + if (!stats->last) + return -ENOMEM; + for_each_possible_cpu(cpu) { last = per_cpu_ptr(stats->last, cpu); last->last_sector = (sector_t)ULLONG_MAX; last->last_rw = UINT_MAX; } + + return 0; } void dm_stats_cleanup(struct dm_stats *stats) diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h index 0bc152c8e4f3..c6728c8b4159 100644 --- a/drivers/md/dm-stats.h +++ b/drivers/md/dm-stats.h @@ -21,7 +21,7 @@ struct dm_stats_aux { unsigned long long duration_ns; }; -void dm_stats_init(struct dm_stats *st); +int dm_stats_init(struct dm_stats *st); void dm_stats_cleanup(struct dm_stats *st); struct mapped_device; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index eace45a18d45..b6ace995b9ca 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2097,7 +2097,9 @@ static struct mapped_device *alloc_dev(int minor) if (!md->pending_io) goto bad; - dm_stats_init(&md->stats); + r = dm_stats_init(&md->stats); + if (r < 0) + goto bad; /* Populate the mapping, nobody knows we exist yet */ spin_lock(&_minor_lock); -- cgit From 636e8adf7878eab3614250234341bde45537f47a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 14 Mar 2023 20:24:04 +0200 Subject: net: dsa: don't error out when drivers return ETH_DATA_LEN in .port_max_mtu() Currently, when dsa_slave_change_mtu() is called on a user port where dev->max_mtu is 1500 (as returned by ds->ops->port_max_mtu()), the code will stumble upon this check: if (new_master_mtu > mtu_limit) return -ERANGE; because new_master_mtu is adjusted for the tagger overhead but mtu_limit is not. But it would be good if the logic went through, for example if the DSA master really depends on an MTU adjustment to accept DSA-tagged frames. To make the code pass through the check, we need to adjust mtu_limit for the overhead as well, if the minimum restriction was caused by the DSA user port's MTU (dev->max_mtu). A DSA user port MTU and a DSA master MTU are always offset by the protocol overhead. Currently no drivers return 1500 .port_max_mtu(), but this is only temporary and a bug in itself - mv88e6xxx should have done that, but since commit b9c587fed61c ("dsa: mv88e6xxx: Include tagger overhead when setting MTU for DSA and CPU ports") it no longer does. This is a preparation for fixing that. Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6957971c2db2..cac17183589f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1933,6 +1933,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) int new_master_mtu; int old_master_mtu; int mtu_limit; + int overhead; int cpu_mtu; int err; @@ -1961,9 +1962,10 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) largest_mtu = slave_mtu; } - mtu_limit = min_t(int, master->max_mtu, dev->max_mtu); + overhead = dsa_tag_protocol_overhead(cpu_dp->tag_ops); + mtu_limit = min_t(int, master->max_mtu, dev->max_mtu + overhead); old_master_mtu = master->mtu; - new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops); + new_master_mtu = largest_mtu + overhead; if (new_master_mtu > mtu_limit) return -ERANGE; @@ -1998,8 +2000,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) out_port_failed: if (new_master_mtu != old_master_mtu) - dsa_port_mtu_change(cpu_dp, old_master_mtu - - dsa_tag_protocol_overhead(cpu_dp->tag_ops)); + dsa_port_mtu_change(cpu_dp, old_master_mtu - overhead); out_cpu_failed: if (new_master_mtu != old_master_mtu) dev_set_mtu(master, old_master_mtu); -- cgit From 7e9517375a14f44ee830ca1c3278076dd65fcc8f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 14 Mar 2023 20:24:05 +0200 Subject: net: dsa: mv88e6xxx: fix max_mtu of 1492 on 6165, 6191, 6220, 6250, 6290 There are 3 classes of switch families that the driver is aware of, as far as mv88e6xxx_change_mtu() is concerned: - MTU configuration is available per port. Here, the chip->info->ops->port_set_jumbo_size() method will be present. - MTU configuration is global to the switch. Here, the chip->info->ops->set_max_frame_size() method will be present. - We don't know how to change the MTU. Here, none of the above methods will be present. Switch families MV88E6165, MV88E6191, MV88E6220, MV88E6250 and MV88E6290 fall in category 3. The blamed commit has adjusted the MTU for all 3 categories by EDSA_HLEN (8 bytes), resulting in a new maximum MTU of 1492 being reported by the driver for these switches. I don't have the hardware to test, but I do have a MV88E6390 switch on which I can simulate this by commenting out its .port_set_jumbo_size definition from mv88e6390_ops. The result is this set of messages at probe time: mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 1 mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 2 mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 3 mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 4 mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 5 mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 6 mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 7 mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 8 It is highly implausible that there exist Ethernet switches which don't support the standard MTU of 1500 octets, and this is what the DSA framework says as well - the error comes from dsa_slave_create() -> dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN). But the error messages are alarming, and it would be good to suppress them. As a consequence of this unlikeliness, we reimplement mv88e6xxx_get_max_mtu() and mv88e6xxx_change_mtu() on switches from the 3rd category as follows: the maximum supported MTU is 1500, and any request to set the MTU to a value larger than that fails in dev_validate_mtu(). Fixes: b9c587fed61c ("dsa: mv88e6xxx: Include tagger overhead when setting MTU for DSA and CPU ports") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 0a5d6c7bb128..30383c4f8fd0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3549,7 +3549,7 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port) return 10240 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN; else if (chip->info->ops->set_max_frame_size) return 1632 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN; - return 1522 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN; + return ETH_DATA_LEN; } static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) @@ -3557,6 +3557,17 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) struct mv88e6xxx_chip *chip = ds->priv; int ret = 0; + /* For families where we don't know how to alter the MTU, + * just accept any value up to ETH_DATA_LEN + */ + if (!chip->info->ops->port_set_jumbo_size && + !chip->info->ops->set_max_frame_size) { + if (new_mtu > ETH_DATA_LEN) + return -EINVAL; + + return 0; + } + if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) new_mtu += EDSA_HLEN; @@ -3565,9 +3576,6 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu); else if (chip->info->ops->set_max_frame_size) ret = chip->info->ops->set_max_frame_size(chip, new_mtu); - else - if (new_mtu > 1522) - ret = -EINVAL; mv88e6xxx_reg_unlock(chip); return ret; -- cgit From 1a87e641d8a50c30b63b1d90819bc607b4327596 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 14 Mar 2023 14:18:27 -0500 Subject: net: Use of_property_read_bool() for boolean properties It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. Convert reading boolean properties to of_property_read_bool(). Reviewed-by: Simon Horman Acked-by: Marc Kleine-Budde # for net/can Acked-by: Kalle Valo Acked-by: Nicolas Ferre Acked-by: Francois Romieu Reviewed-by: Wei Fang Signed-off-by: Rob Herring Signed-off-by: David S. Miller --- drivers/net/can/cc770/cc770_platform.c | 12 ++++++------ drivers/net/ethernet/cadence/macb_main.c | 2 +- drivers/net/ethernet/davicom/dm9000.c | 4 ++-- drivers/net/ethernet/freescale/fec_main.c | 2 +- drivers/net/ethernet/freescale/fec_mpc52xx.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 4 ++-- drivers/net/ethernet/ibm/emac/core.c | 8 ++++---- drivers/net/ethernet/ibm/emac/rgmii.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 3 +-- drivers/net/ethernet/sun/niu.c | 2 +- drivers/net/ethernet/ti/cpsw-phy-sel.c | 3 +-- drivers/net/ethernet/ti/netcp_ethss.c | 8 +++----- drivers/net/ethernet/via/via-velocity.c | 3 +-- drivers/net/ethernet/via/via-velocity.h | 2 +- drivers/net/ethernet/xilinx/ll_temac_main.c | 9 ++++----- drivers/net/wan/fsl_ucc_hdlc.c | 11 +++-------- drivers/net/wireless/ti/wlcore/spi.c | 3 +-- net/ncsi/ncsi-manage.c | 4 ++-- 18 files changed, 36 insertions(+), 48 deletions(-) diff --git a/drivers/net/can/cc770/cc770_platform.c b/drivers/net/can/cc770/cc770_platform.c index 8d916e2ee6c2..8dcc32e4e30e 100644 --- a/drivers/net/can/cc770/cc770_platform.c +++ b/drivers/net/can/cc770/cc770_platform.c @@ -93,20 +93,20 @@ static int cc770_get_of_node_data(struct platform_device *pdev, if (priv->can.clock.freq > 8000000) priv->cpu_interface |= CPUIF_DMC; - if (of_get_property(np, "bosch,divide-memory-clock", NULL)) + if (of_property_read_bool(np, "bosch,divide-memory-clock")) priv->cpu_interface |= CPUIF_DMC; - if (of_get_property(np, "bosch,iso-low-speed-mux", NULL)) + if (of_property_read_bool(np, "bosch,iso-low-speed-mux")) priv->cpu_interface |= CPUIF_MUX; if (!of_get_property(np, "bosch,no-comperator-bypass", NULL)) priv->bus_config |= BUSCFG_CBY; - if (of_get_property(np, "bosch,disconnect-rx0-input", NULL)) + if (of_property_read_bool(np, "bosch,disconnect-rx0-input")) priv->bus_config |= BUSCFG_DR0; - if (of_get_property(np, "bosch,disconnect-rx1-input", NULL)) + if (of_property_read_bool(np, "bosch,disconnect-rx1-input")) priv->bus_config |= BUSCFG_DR1; - if (of_get_property(np, "bosch,disconnect-tx1-output", NULL)) + if (of_property_read_bool(np, "bosch,disconnect-tx1-output")) priv->bus_config |= BUSCFG_DT1; - if (of_get_property(np, "bosch,polarity-dominant", NULL)) + if (of_property_read_bool(np, "bosch,polarity-dominant")) priv->bus_config |= BUSCFG_POL; prop = of_get_property(np, "bosch,clock-out-frequency", &prop_size); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6e141a8bbf43..66e30561569e 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4990,7 +4990,7 @@ static int macb_probe(struct platform_device *pdev) bp->jumbo_max_len = macb_config->jumbo_max_len; bp->wol = 0; - if (of_get_property(np, "magic-packet", NULL)) + if (of_property_read_bool(np, "magic-packet")) bp->wol |= MACB_WOL_HAS_MAGIC_PACKET; device_set_wakeup_capable(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index b21e56de6167..05a89ab6766c 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1393,9 +1393,9 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) if (!pdata) return ERR_PTR(-ENOMEM); - if (of_find_property(np, "davicom,ext-phy", NULL)) + if (of_property_read_bool(np, "davicom,ext-phy")) pdata->flags |= DM9000_PLATF_EXT_PHY; - if (of_find_property(np, "davicom,no-eeprom", NULL)) + if (of_property_read_bool(np, "davicom,no-eeprom")) pdata->flags |= DM9000_PLATF_NO_EEPROM; ret = of_get_mac_address(np, pdata->dev_addr); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c73e25f8995e..f3b16a6673e2 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -4251,7 +4251,7 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_ipc_init; - if (of_get_property(np, "fsl,magic-packet", NULL)) + if (of_property_read_bool(np, "fsl,magic-packet")) fep->wol_flag |= FEC_WOL_HAS_MAGIC_PACKET; ret = fec_enet_init_stop_mode(fep, np); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index a7f4c3c29f3e..b88816b71ddf 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -937,7 +937,7 @@ static int mpc52xx_fec_probe(struct platform_device *op) priv->phy_node = of_parse_phandle(np, "phy-handle", 0); /* the 7-wire property means don't use MII mode */ - if (of_find_property(np, "fsl,7-wire-mode", NULL)) { + if (of_property_read_bool(np, "fsl,7-wire-mode")) { priv->seven_wire_mode = 1; dev_info(&ndev->dev, "using 7-wire PHY mode\n"); } diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index b2def295523a..38d5013c6fed 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -787,10 +787,10 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) else priv->interface = gfar_get_interface(dev); - if (of_find_property(np, "fsl,magic-packet", NULL)) + if (of_property_read_bool(np, "fsl,magic-packet")) priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET; - if (of_get_property(np, "fsl,wake-on-filer", NULL)) + if (of_property_read_bool(np, "fsl,wake-on-filer")) priv->device_flags |= FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER; priv->phy_node = of_parse_phandle(np, "phy-handle", 0); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 9b08e41ccc29..c97095abd26a 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2939,9 +2939,9 @@ static int emac_init_config(struct emac_instance *dev) } /* Fixup some feature bits based on the device tree */ - if (of_get_property(np, "has-inverted-stacr-oc", NULL)) + if (of_property_read_bool(np, "has-inverted-stacr-oc")) dev->features |= EMAC_FTR_STACR_OC_INVERT; - if (of_get_property(np, "has-new-stacr-staopc", NULL)) + if (of_property_read_bool(np, "has-new-stacr-staopc")) dev->features |= EMAC_FTR_HAS_NEW_STACR; /* CAB lacks the appropriate properties */ @@ -3042,7 +3042,7 @@ static int emac_probe(struct platform_device *ofdev) * property here for now, but new flat device trees should set a * status property to "disabled" instead. */ - if (of_get_property(np, "unused", NULL) || !of_device_is_available(np)) + if (of_property_read_bool(np, "unused") || !of_device_is_available(np)) return -ENODEV; /* Find ourselves in the bootlist if we are there */ @@ -3333,7 +3333,7 @@ static void __init emac_make_bootlist(void) if (of_match_node(emac_match, np) == NULL) continue; - if (of_get_property(np, "unused", NULL)) + if (of_property_read_bool(np, "unused")) continue; idx = of_get_property(np, "cell-index", NULL); if (idx == NULL) diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c index 242ef976fd15..50358cf00130 100644 --- a/drivers/net/ethernet/ibm/emac/rgmii.c +++ b/drivers/net/ethernet/ibm/emac/rgmii.c @@ -242,7 +242,7 @@ static int rgmii_probe(struct platform_device *ofdev) } /* Check for RGMII flags */ - if (of_get_property(ofdev->dev.of_node, "has-mdio", NULL)) + if (of_property_read_bool(ofdev->dev.of_node, "has-mdio")) dev->flags |= EMAC_RGMII_FLAG_HAS_MDIO; /* CAB lacks the right properties, fix this up */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index ac8580f501e2..ac550d1ac015 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -213,8 +213,7 @@ imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev) struct device_node *np = dev->of_node; int err = 0; - if (of_get_property(np, "snps,rmii_refclk_ext", NULL)) - dwmac->rmii_refclk_ext = true; + dwmac->rmii_refclk_ext = of_property_read_bool(np, "snps,rmii_refclk_ext"); dwmac->clk_tx = devm_clk_get(dev, "tx"); if (IS_ERR(dwmac->clk_tx)) { diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index e6144d963eaa..ab8b09a9ef61 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9271,7 +9271,7 @@ static int niu_get_of_props(struct niu *np) if (model) strcpy(np->vpd.model, model); - if (of_find_property(dp, "hot-swappable-phy", NULL)) { + if (of_property_read_bool(dp, "hot-swappable-phy")) { np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER | NIU_FLAGS_HOTPLUG_PHY); } diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index e8f38e3f7706..25e707d7b87c 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -226,8 +226,7 @@ static int cpsw_phy_sel_probe(struct platform_device *pdev) if (IS_ERR(priv->gmii_sel)) return PTR_ERR(priv->gmii_sel); - if (of_find_property(pdev->dev.of_node, "rmii-clock-ext", NULL)) - priv->rmii_clock_external = true; + priv->rmii_clock_external = of_property_read_bool(pdev->dev.of_node, "rmii-clock-ext"); dev_set_drvdata(&pdev->dev, priv); diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 751fb0bc65c5..2adf82a32bf6 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3583,13 +3583,11 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, /* init the hw stats lock */ spin_lock_init(&gbe_dev->hw_stats_lock); - if (of_find_property(node, "enable-ale", NULL)) { - gbe_dev->enable_ale = true; + gbe_dev->enable_ale = of_property_read_bool(node, "enable-ale"); + if (gbe_dev->enable_ale) dev_info(dev, "ALE enabled\n"); - } else { - gbe_dev->enable_ale = false; + else dev_dbg(dev, "ALE bypass enabled*\n"); - } ret = of_property_read_u32(node, "tx-queue", &gbe_dev->tx_queue_id); diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index a502812ac418..86f7843b4591 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2709,8 +2709,7 @@ static int velocity_get_platform_info(struct velocity_info *vptr) struct resource res; int ret; - if (of_get_property(vptr->dev->of_node, "no-eeprom", NULL)) - vptr->no_eeprom = 1; + vptr->no_eeprom = of_property_read_bool(vptr->dev->of_node, "no-eeprom"); ret = of_address_to_resource(vptr->dev->of_node, 0, &res); if (ret) { diff --git a/drivers/net/ethernet/via/via-velocity.h b/drivers/net/ethernet/via/via-velocity.h index ffdac6fac054..f64ed39b93d8 100644 --- a/drivers/net/ethernet/via/via-velocity.h +++ b/drivers/net/ethernet/via/via-velocity.h @@ -1383,7 +1383,7 @@ struct velocity_info { struct device *dev; struct pci_dev *pdev; struct net_device *netdev; - int no_eeprom; + bool no_eeprom; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; u8 ip_addr[4]; diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 1066420d6a83..e0ac1bcd9925 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1455,12 +1455,11 @@ static int temac_probe(struct platform_device *pdev) * endianness mode. Default for OF devices is big-endian. */ little_endian = false; - if (temac_np) { - if (of_get_property(temac_np, "little-endian", NULL)) - little_endian = true; - } else if (pdata) { + if (temac_np) + little_endian = of_property_read_bool(temac_np, "little-endian"); + else if (pdata) little_endian = pdata->reg_little_endian; - } + if (little_endian) { lp->temac_ior = _temac_ior_le; lp->temac_iow = _temac_iow_le; diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 1c53b5546927..47c2ad7a3e42 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -1177,14 +1177,9 @@ static int ucc_hdlc_probe(struct platform_device *pdev) uhdlc_priv->dev = &pdev->dev; uhdlc_priv->ut_info = ut_info; - if (of_get_property(np, "fsl,tdm-interface", NULL)) - uhdlc_priv->tsa = 1; - - if (of_get_property(np, "fsl,ucc-internal-loopback", NULL)) - uhdlc_priv->loopback = 1; - - if (of_get_property(np, "fsl,hdlc-bus", NULL)) - uhdlc_priv->hdlc_bus = 1; + uhdlc_priv->tsa = of_property_read_bool(np, "fsl,tdm-interface"); + uhdlc_priv->loopback = of_property_read_bool(np, "fsl,ucc-internal-loopback"); + uhdlc_priv->hdlc_bus = of_property_read_bool(np, "fsl,hdlc-bus"); if (uhdlc_priv->tsa == 1) { utdm = kzalloc(sizeof(*utdm), GFP_KERNEL); diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 2d2edddc77bd..3f88e6a0a510 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -447,8 +447,7 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, dev_info(&spi->dev, "selected chip family is %s\n", pdev_data->family->name); - if (of_find_property(dt_node, "clock-xtal", NULL)) - pdev_data->ref_clock_xtal = true; + pdev_data->ref_clock_xtal = of_property_read_bool(dt_node, "clock-xtal"); /* optional clock frequency params */ of_property_read_u32(dt_node, "ref-clock-frequency", diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 80713febfac6..d9da942ad53d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1803,8 +1803,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; - if (np && (of_get_property(np, "mellanox,multi-host", NULL) || - of_get_property(np, "mlx,multi-host", NULL))) + if (np && (of_property_read_bool(np, "mellanox,multi-host") || + of_property_read_bool(np, "mlx,multi-host"))) ndp->mlx_multi_host = true; } -- cgit From fa0f1ba7c8233118b6fdaa65e2f5ded563d3e1fa Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 15 Mar 2023 09:52:22 +0800 Subject: virtio_net: fix page_to_skb() miss headroom Because headroom is not passed to page_to_skb(), this causes the shinfo exceeds the range. Then the frags of shinfo are changed by other process. [ 157.724634] stack segment: 0000 [#1] PREEMPT SMP NOPTI [ 157.725358] CPU: 3 PID: 679 Comm: xdp_pass_user_f Tainted: G E 6.2.0+ #150 [ 157.726401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/4 [ 157.727820] RIP: 0010:skb_release_data+0x11b/0x180 [ 157.728449] Code: 44 24 02 48 83 c3 01 39 d8 7e be 48 89 d8 48 c1 e0 04 41 80 7d 7e 00 49 8b 6c 04 30 79 0c 48 89 ef e8 89 b [ 157.730751] RSP: 0018:ffffc90000178b48 EFLAGS: 00010202 [ 157.731383] RAX: 0000000000000010 RBX: 0000000000000001 RCX: 0000000000000000 [ 157.732270] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff888100dd0b00 [ 157.733117] RBP: 5d5d76010f6e2408 R08: ffff888100dd0b2c R09: 0000000000000000 [ 157.734013] R10: ffffffff82effd30 R11: 000000000000a14e R12: ffff88810981ffc0 [ 157.734904] R13: ffff888100dd0b00 R14: 0000000000000002 R15: 0000000000002310 [ 157.735793] FS: 00007f06121d9740(0000) GS:ffff88842fcc0000(0000) knlGS:0000000000000000 [ 157.736794] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 157.737522] CR2: 00007ffd9a56c084 CR3: 0000000104bda001 CR4: 0000000000770ee0 [ 157.738420] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 157.739283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 157.740146] PKRU: 55555554 [ 157.740502] Call Trace: [ 157.740843] [ 157.741117] kfree_skb_reason+0x50/0x120 [ 157.741613] __udp4_lib_rcv+0x52b/0x5e0 [ 157.742132] ip_protocol_deliver_rcu+0xaf/0x190 [ 157.742715] ip_local_deliver_finish+0x77/0xa0 [ 157.743280] ip_sublist_rcv_finish+0x80/0x90 [ 157.743834] ip_list_rcv_finish.constprop.0+0x16f/0x190 [ 157.744493] ip_list_rcv+0x126/0x140 [ 157.744952] __netif_receive_skb_list_core+0x29b/0x2c0 [ 157.745602] __netif_receive_skb_list+0xed/0x160 [ 157.746190] ? udp4_gro_receive+0x275/0x350 [ 157.746732] netif_receive_skb_list_internal+0xf2/0x1b0 [ 157.747398] napi_gro_receive+0xd1/0x210 [ 157.747911] virtnet_receive+0x75/0x1c0 [ 157.748422] virtnet_poll+0x48/0x1b0 [ 157.748878] __napi_poll+0x29/0x1b0 [ 157.749330] net_rx_action+0x27a/0x340 [ 157.749812] __do_softirq+0xf3/0x2fb [ 157.750298] do_softirq+0xa2/0xd0 [ 157.750745] [ 157.751563] [ 157.752329] __local_bh_enable_ip+0x6d/0x80 [ 157.753178] virtnet_xdp_set+0x482/0x860 [ 157.754159] ? __pfx_virtnet_xdp+0x10/0x10 [ 157.755129] dev_xdp_install+0xa4/0xe0 [ 157.756033] dev_xdp_attach+0x20b/0x5e0 [ 157.756933] do_setlink+0x82e/0xc90 [ 157.757777] ? __nla_validate_parse+0x12b/0x1e0 [ 157.758744] rtnl_setlink+0xd8/0x170 [ 157.759549] ? mod_objcg_state+0xcb/0x320 [ 157.760328] ? security_capable+0x37/0x60 [ 157.761209] ? security_capable+0x37/0x60 [ 157.762072] rtnetlink_rcv_msg+0x145/0x3d0 [ 157.762929] ? ___slab_alloc+0x327/0x610 [ 157.763754] ? __alloc_skb+0x141/0x170 [ 157.764533] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 157.765422] netlink_rcv_skb+0x58/0x110 [ 157.766229] netlink_unicast+0x21f/0x330 [ 157.766951] netlink_sendmsg+0x240/0x4a0 [ 157.767654] sock_sendmsg+0x93/0xa0 [ 157.768434] ? sockfd_lookup_light+0x12/0x70 [ 157.769245] __sys_sendto+0xfe/0x170 [ 157.770079] ? handle_mm_fault+0xe9/0x2d0 [ 157.770859] ? preempt_count_add+0x51/0xa0 [ 157.771645] ? up_read+0x3c/0x80 [ 157.772340] ? do_user_addr_fault+0x1e9/0x710 [ 157.773166] ? kvm_read_and_reset_apf_flags+0x49/0x60 [ 157.774087] __x64_sys_sendto+0x29/0x30 [ 157.774856] do_syscall_64+0x3c/0x90 [ 157.775518] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 157.776382] RIP: 0033:0x7f06122def70 Fixes: 18117a842ab0 ("virtio-net: remove xdp related info from page_to_skb()") Signed-off-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1a309cfb4976..8ecf7a341d54 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -446,7 +446,8 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, - unsigned int len, unsigned int truesize) + unsigned int len, unsigned int truesize, + unsigned int headroom) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -464,11 +465,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - buf = p; + buf = p - headroom; len -= hdr_len; offset += hdr_padded_len; p += hdr_padded_len; - tailroom = truesize - hdr_padded_len - len; + tailroom = truesize - headroom - hdr_padded_len - len; shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -1009,7 +1010,7 @@ static struct sk_buff *receive_big(struct net_device *dev, { struct page *page = buf; struct sk_buff *skb = - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); stats->bytes += len - vi->hdr_len; if (unlikely(!skb)) @@ -1332,7 +1333,7 @@ err_xdp_frags: rcu_read_unlock(); skip_xdp: - head_skb = page_to_skb(vi, rq, page, offset, len, truesize); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom); curr_skb = head_skb; if (unlikely(!curr_skb)) -- cgit From 1a3bd6eabae35afc5c6dbe2651f21467cf8ad3fd Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 15 Mar 2023 09:52:23 +0800 Subject: virtio_net: free xdp shinfo frags when build_skb_from_xdp_buff() fails build_skb_from_xdp_buff() may return NULL, in this case we need to free the frags of xdp shinfo. Fixes: fab89bafa95b ("virtio-net: support multi-buffer xdp") Signed-off-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Reviewed-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8ecf7a341d54..2396c28c0122 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1273,9 +1273,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, switch (act) { case XDP_PASS: + head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); + if (unlikely(!head_skb)) + goto err_xdp_frags; + if (unlikely(xdp_page != page)) put_page(page); - head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); rcu_read_unlock(); return head_skb; case XDP_TX: -- cgit From 987dd36c0141f6ab9f0fbf14d6b2ec3342dedb2f Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 30 Jan 2023 16:32:46 +0100 Subject: i2c: imx-lpi2c: clean rx/tx buffers upon new message When start sending a new message clear the Rx & Tx buffer pointers in order to avoid using stale pointers. Signed-off-by: Alexander Stein Tested-by: Emanuele Ghidoli Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx-lpi2c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 188f2a36d2fd..c6d0225246e6 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -463,6 +463,8 @@ static int lpi2c_imx_xfer(struct i2c_adapter *adapter, if (num == 1 && msgs[0].len == 0) goto stop; + lpi2c_imx->rx_buf = NULL; + lpi2c_imx->tx_buf = NULL; lpi2c_imx->delivered = 0; lpi2c_imx->msglen = msgs[i].len; init_completion(&lpi2c_imx->complete); -- cgit From 1c7885004567e8951d65a983be095f254dd20bef Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 30 Jan 2023 16:32:47 +0100 Subject: i2c: imx-lpi2c: check only for enabled interrupt flags When reading from I2C, the Tx watermark is set to 0. Unfortunately the TDF (transmit data flag) is enabled when Tx FIFO entries is equal or less than watermark. So it is set in every case, hence the reset default of 1. This results in the MSR_RDF _and_ MSR_TDF flags to be set thus trying to send Tx data on a read message. Mask the IRQ status to filter for wanted flags only. Fixes: a55fa9d0e42e ("i2c: imx-lpi2c: add low power i2c bus driver") Signed-off-by: Alexander Stein Tested-by: Emanuele Ghidoli Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index c6d0225246e6..a49b14d52a98 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -505,10 +505,14 @@ disable: static irqreturn_t lpi2c_imx_isr(int irq, void *dev_id) { struct lpi2c_imx_struct *lpi2c_imx = dev_id; + unsigned int enabled; unsigned int temp; + enabled = readl(lpi2c_imx->base + LPI2C_MIER); + lpi2c_imx_intctrl(lpi2c_imx, 0); temp = readl(lpi2c_imx->base + LPI2C_MSR); + temp &= enabled; if (temp & MSR_RDF) lpi2c_imx_read_rxfifo(lpi2c_imx); -- cgit From 5190417bdf72c71b65bd9892103c6186816a6e8b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 13 Feb 2023 16:25:50 +0100 Subject: i2c: mxs: ensure that DMA buffers are safe for DMA We found that after commit 9c46929e7989 ("ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems"), the PCF85063 RTC driver stopped working on i.MX28 due to regmap_bulk_read() reading bogus data into a stack buffer. This is caused by the i2c-mxs driver using DMA transfers even for messages without the I2C_M_DMA_SAFE flag, and the aforementioned commit enabling vmapped stacks. As the MXS I2C controller requires DMA for reads of >4 bytes, DMA can't be disabled, so the issue is fixed by using i2c_get_dma_safe_msg_buf() to create a bounce buffer when needed. Fixes: 9c46929e7989 ("ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems") Signed-off-by: Matthias Schiffer Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mxs.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index d113bed79545..e0f3b3545cfe 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -171,7 +171,7 @@ static void mxs_i2c_dma_irq_callback(void *param) } static int mxs_i2c_dma_setup_xfer(struct i2c_adapter *adap, - struct i2c_msg *msg, uint32_t flags) + struct i2c_msg *msg, u8 *buf, uint32_t flags) { struct dma_async_tx_descriptor *desc; struct mxs_i2c_dev *i2c = i2c_get_adapdata(adap); @@ -226,7 +226,7 @@ static int mxs_i2c_dma_setup_xfer(struct i2c_adapter *adap, } /* Queue the DMA data transfer. */ - sg_init_one(&i2c->sg_io[1], msg->buf, msg->len); + sg_init_one(&i2c->sg_io[1], buf, msg->len); dma_map_sg(i2c->dev, &i2c->sg_io[1], 1, DMA_FROM_DEVICE); desc = dmaengine_prep_slave_sg(i2c->dmach, &i2c->sg_io[1], 1, DMA_DEV_TO_MEM, @@ -259,7 +259,7 @@ static int mxs_i2c_dma_setup_xfer(struct i2c_adapter *adap, /* Queue the DMA data transfer. */ sg_init_table(i2c->sg_io, 2); sg_set_buf(&i2c->sg_io[0], &i2c->addr_data, 1); - sg_set_buf(&i2c->sg_io[1], msg->buf, msg->len); + sg_set_buf(&i2c->sg_io[1], buf, msg->len); dma_map_sg(i2c->dev, i2c->sg_io, 2, DMA_TO_DEVICE); desc = dmaengine_prep_slave_sg(i2c->dmach, i2c->sg_io, 2, DMA_MEM_TO_DEV, @@ -563,6 +563,7 @@ static int mxs_i2c_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, struct mxs_i2c_dev *i2c = i2c_get_adapdata(adap); int ret; int flags; + u8 *dma_buf; int use_pio = 0; unsigned long time_left; @@ -588,13 +589,20 @@ static int mxs_i2c_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, if (ret && (ret != -ENXIO)) mxs_i2c_reset(i2c); } else { + dma_buf = i2c_get_dma_safe_msg_buf(msg, 1); + if (!dma_buf) + return -ENOMEM; + reinit_completion(&i2c->cmd_complete); - ret = mxs_i2c_dma_setup_xfer(adap, msg, flags); - if (ret) + ret = mxs_i2c_dma_setup_xfer(adap, msg, dma_buf, flags); + if (ret) { + i2c_put_dma_safe_msg_buf(dma_buf, msg, false); return ret; + } time_left = wait_for_completion_timeout(&i2c->cmd_complete, msecs_to_jiffies(1000)); + i2c_put_dma_safe_msg_buf(dma_buf, msg, true); if (!time_left) goto timeout; -- cgit From cc9812a3096d1986caca9a23bee99effc45c08df Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 13 Mar 2023 15:45:51 +0800 Subject: i2c: hisi: Avoid redundant interrupts After issuing all the messages we can disable the TX_EMPTY interrupts to avoid handling redundant interrupts. For doing a sinlge bus detection (i2cdetect -y -r 0) we can reduce ~97% interrupts (before ~12000 after ~400). Signed-off-by: Sheng Feng Signed-off-by: Yicong Yang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-hisi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c index 8c6c7075c765..1b7609a34f4a 100644 --- a/drivers/i2c/busses/i2c-hisi.c +++ b/drivers/i2c/busses/i2c-hisi.c @@ -316,6 +316,13 @@ static void hisi_i2c_xfer_msg(struct hisi_i2c_controller *ctlr) max_write == 0) break; } + + /* + * Disable the TX_EMPTY interrupt after finishing all the messages to + * avoid overwhelming the CPU. + */ + if (ctlr->msg_tx_idx == ctlr->msg_num) + hisi_i2c_disable_int(ctlr, HISI_I2C_INT_TX_EMPTY); } static irqreturn_t hisi_i2c_irq(int irq, void *context) -- cgit From d98263512684a47e81bcb72a5408958ecd1e60b0 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 13 Mar 2023 15:45:52 +0800 Subject: i2c: hisi: Only use the completion interrupt to finish the transfer The controller will always generate a completion interrupt when the transfer is finished normally or not. Currently we use either error or completion interrupt to finish, this may result the completion interrupt unhandled and corrupt the next transfer, especially at low speed mode. Since on error case, the error interrupt will come first then is the completion interrupt. So only use the completion interrupt to finish the whole transfer process. Fixes: d62fbdb99a85 ("i2c: add support for HiSilicon I2C controller") Reported-by: Sheng Feng Signed-off-by: Sheng Feng Signed-off-by: Yicong Yang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-hisi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c index 1b7609a34f4a..e067671b3ce2 100644 --- a/drivers/i2c/busses/i2c-hisi.c +++ b/drivers/i2c/busses/i2c-hisi.c @@ -348,7 +348,11 @@ static irqreturn_t hisi_i2c_irq(int irq, void *context) hisi_i2c_read_rx_fifo(ctlr); out: - if (int_stat & HISI_I2C_INT_TRANS_CPLT || ctlr->xfer_err) { + /* + * Only use TRANS_CPLT to indicate the completion. On error cases we'll + * get two interrupts, INT_ERR first then TRANS_CPLT. + */ + if (int_stat & HISI_I2C_INT_TRANS_CPLT) { hisi_i2c_disable_int(ctlr, HISI_I2C_INT_ALL); hisi_i2c_clear_int(ctlr, HISI_I2C_INT_ALL); complete(ctlr->completion); -- cgit From 92fbb6d1296f81f41f65effd7f5f8c0f74943d15 Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Tue, 14 Mar 2023 16:54:21 +0000 Subject: i2c: xgene-slimpro: Fix out-of-bounds bug in xgene_slimpro_i2c_xfer() The data->block[0] variable comes from user and is a number between 0-255. Without proper check, the variable may be very large to cause an out-of-bounds when performing memcpy in slimpro_i2c_blkwr. Fix this bug by checking the value of writelen. Fixes: f6505fbabc42 ("i2c: add SLIMpro I2C device driver on APM X-Gene platform") Signed-off-by: Wei Chen Cc: stable@vger.kernel.org Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xgene-slimpro.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index 63259b3ea5ab..3538d36368a9 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -308,6 +308,9 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip, u32 msg[3]; int rc; + if (writelen > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + memcpy(ctx->dma_buffer, data, writelen); paddr = dma_map_single(ctx->dev, ctx->dma_buffer, writelen, DMA_TO_DEVICE); -- cgit From 7f5ebf5dae42e710162f1c481ebcf28ab7b741c7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 15 Mar 2023 08:41:14 +0100 Subject: ravb: avoid PHY being resumed when interface is not up RAVB doesn't need mdiobus suspend/resume, that's why it sets 'mac_managed_pm'. However, setting it needs to be moved from init to probe, so mdiobus PM functions will really never be called (e.g. when the interface is not up yet during suspend/resume). Fixes: 4924c0cdce75 ("net: ravb: Fix PHY state warning splat during system resume") Suggested-by: Heiner Kallweit Signed-off-by: Wolfram Sang Reviewed-by: Michal Kubiak Reviewed-by: Sergey Shtylyov Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0f54849a3823..894e2690c643 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1455,8 +1455,6 @@ static int ravb_phy_init(struct net_device *ndev) phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); } - /* Indicate that the MAC is responsible for managing PHY PM */ - phydev->mac_managed_pm = true; phy_attached_info(phydev); return 0; @@ -2379,6 +2377,8 @@ static int ravb_mdio_init(struct ravb_private *priv) { struct platform_device *pdev = priv->pdev; struct device *dev = &pdev->dev; + struct phy_device *phydev; + struct device_node *pn; int error; /* Bitbang init */ @@ -2400,6 +2400,14 @@ static int ravb_mdio_init(struct ravb_private *priv) if (error) goto out_free_bus; + pn = of_parse_phandle(dev->of_node, "phy-handle", 0); + phydev = of_phy_find_device(pn); + if (phydev) { + phydev->mac_managed_pm = true; + put_device(&phydev->mdio.dev); + } + of_node_put(pn); + return 0; out_free_bus: -- cgit From c6be7136afb224a01d4cde2983ddebac8da98693 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 15 Mar 2023 08:41:15 +0100 Subject: sh_eth: avoid PHY being resumed when interface is not up SH_ETH doesn't need mdiobus suspend/resume, that's why it sets 'mac_managed_pm'. However, setting it needs to be moved from init to probe, so mdiobus PM functions will really never be called (e.g. when the interface is not up yet during suspend/resume). Fixes: 6a1dbfefdae4 ("net: sh_eth: Fix PHY state warning splat during system resume") Suggested-by: Heiner Kallweit Signed-off-by: Wolfram Sang Reviewed-by: Michal Kubiak Reviewed-by: Sergey Shtylyov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/sh_eth.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index ed17163d7811..d8ec729825be 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2029,8 +2029,6 @@ static int sh_eth_phy_init(struct net_device *ndev) if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) phy_set_max_speed(phydev, SPEED_100); - /* Indicate that the MAC is responsible for managing PHY PM */ - phydev->mac_managed_pm = true; phy_attached_info(phydev); return 0; @@ -3097,6 +3095,8 @@ static int sh_mdio_init(struct sh_eth_private *mdp, struct bb_info *bitbang; struct platform_device *pdev = mdp->pdev; struct device *dev = &mdp->pdev->dev; + struct phy_device *phydev; + struct device_node *pn; /* create bit control struct for PHY */ bitbang = devm_kzalloc(dev, sizeof(struct bb_info), GFP_KERNEL); @@ -3133,6 +3133,14 @@ static int sh_mdio_init(struct sh_eth_private *mdp, if (ret) goto out_free_bus; + pn = of_parse_phandle(dev->of_node, "phy-handle", 0); + phydev = of_phy_find_device(pn); + if (phydev) { + phydev->mac_managed_pm = true; + put_device(&phydev->mdio.dev); + } + of_node_put(pn); + return 0; out_free_bus: -- cgit From 8a2618e14f81604a9b6ad305d57e0c8da939cd65 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Mar 2023 14:40:09 +0200 Subject: ipv4: Fix incorrect table ID in IOCTL path Commit f96a3d74554d ("ipv4: Fix incorrect route flushing when source address is deleted") started to take the table ID field in the FIB info structure into account when determining if two structures are identical or not. This field is initialized using the 'fc_table' field in the route configuration structure, which is not set when adding a route via IOCTL. The above can result in user space being able to install two identical routes that only differ in the table ID field of their associated FIB info. Fix by initializing the table ID field in the route configuration structure in the IOCTL path. Before the fix: # ip route add default via 192.0.2.2 # route add default gw 192.0.2.2 # ip -4 r show default # default via 192.0.2.2 dev dummy10 # default via 192.0.2.2 dev dummy10 After the fix: # ip route add default via 192.0.2.2 # route add default gw 192.0.2.2 SIOCADDRT: File exists # ip -4 r show default default via 192.0.2.2 dev dummy10 Audited the code paths to ensure there are no other paths that do not properly initialize the route configuration structure when installing a route. Fixes: 5a56a0b3a45d ("net: Don't delete routes in different VRFs") Fixes: f96a3d74554d ("ipv4: Fix incorrect route flushing when source address is deleted") Reported-by: gaoxingwang Link: https://lore.kernel.org/netdev/20230314144159.2354729-1-gaoxingwang1@huawei.com/ Tested-by: gaoxingwang Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230315124009.4015212-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b5736ef16ed2..390f4be7f7be 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -576,6 +576,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, cfg->fc_scope = RT_SCOPE_UNIVERSE; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + if (cmd == SIOCDELRT) return 0; -- cgit From 43ffe6caccc7a1bb9d7442fbab521efbf6c1378c Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Thu, 16 Mar 2023 12:05:40 +0100 Subject: net: usb: smsc75xx: Move packet length check to prevent kernel panic in skb_pull Packet length check needs to be located after size and align_count calculation to prevent kernel panic in skb_pull() in case rx_cmd_a & RX_CMD_A_RED evaluates to true. Fixes: d8b228318935 ("net: usb: smsc75xx: Limit packet length to skb->len") Signed-off-by: Szymon Heidrich Link: https://lore.kernel.org/r/20230316110540.77531-1-szymon.heidrich@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/smsc75xx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index db34f8d1d605..5d6454fedb3f 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -2200,6 +2200,13 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) size = (rx_cmd_a & RX_CMD_A_LEN) - RXW_PADDING; align_count = (4 - ((size + RXW_PADDING) % 4)) % 4; + if (unlikely(size > skb->len)) { + netif_dbg(dev, rx_err, dev->net, + "size err rx_cmd_a=0x%08x\n", + rx_cmd_a); + return 0; + } + if (unlikely(rx_cmd_a & RX_CMD_A_RED)) { netif_dbg(dev, rx_err, dev->net, "Error rx_cmd_a=0x%08x\n", rx_cmd_a); @@ -2212,8 +2219,7 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) dev->net->stats.rx_frame_errors++; } else { /* MAX_SINGLE_PACKET_SIZE + 4(CRC) + 2(COE) + 4(Vlan) */ - if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12) || - size > skb->len)) { + if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12))) { netif_dbg(dev, rx_err, dev->net, "size err rx_cmd_a=0x%08x\n", rx_cmd_a); -- cgit From 37d010399f7552add2b68e2b347901c83562dab8 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 15 Mar 2023 13:55:38 +0100 Subject: net: atlantic: Fix crash when XDP is enabled but no program is loaded MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The aq_xdp_run_prog() function falls back to the XDP_ABORTED action handler (using a goto) if the operations for any of the other actions fail. The XDP_ABORTED handler in turn calls the bpf_warn_invalid_xdp_action() tracepoint. However, the function also jumps into the XDP_PASS helper if no XDP program is loaded on the device, which means the XDP_ABORTED handler can be run with a NULL program pointer. This results in a NULL pointer deref because the tracepoint dereferences the 'prog' pointer passed to it. This situation can happen in multiple ways: - If a packet arrives between the removal of the program from the interface and the static_branch_dec() in aq_xdp_setup() - If there are multiple devices using the same driver in the system and one of them has an XDP program loaded and the other does not. Fix this by refactoring the aq_xdp_run_prog() function to remove the 'goto pass' handling if there is no XDP program loaded. Instead, factor out the skb building in a separate small helper function. Fixes: 26efaef759a1 ("net: atlantic: Implement xdp data plane") Reported-by: Freysteinn Alfredsson Tested-by: Freysteinn Alfredsson Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20230315125539.103319-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 28 ++++++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 1e8d902e1c8e..7f933175cbda 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -412,6 +412,25 @@ int aq_xdp_xmit(struct net_device *dev, int num_frames, return num_frames - drop; } +static struct sk_buff *aq_xdp_build_skb(struct xdp_buff *xdp, + struct net_device *dev, + struct aq_ring_buff_s *buff) +{ + struct xdp_frame *xdpf; + struct sk_buff *skb; + + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) + return NULL; + + skb = xdp_build_skb_from_frame(xdpf, dev); + if (!skb) + return NULL; + + aq_get_rxpages_xdp(buff, xdp); + return skb; +} + static struct sk_buff *aq_xdp_run_prog(struct aq_nic_s *aq_nic, struct xdp_buff *xdp, struct aq_ring_s *rx_ring, @@ -431,7 +450,7 @@ static struct sk_buff *aq_xdp_run_prog(struct aq_nic_s *aq_nic, prog = READ_ONCE(rx_ring->xdp_prog); if (!prog) - goto pass; + return aq_xdp_build_skb(xdp, aq_nic->ndev, buff); prefetchw(xdp->data_hard_start); /* xdp_frame write */ @@ -442,17 +461,12 @@ static struct sk_buff *aq_xdp_run_prog(struct aq_nic_s *aq_nic, act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_PASS: -pass: - xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) - goto out_aborted; - skb = xdp_build_skb_from_frame(xdpf, aq_nic->ndev); + skb = aq_xdp_build_skb(xdp, aq_nic->ndev, buff); if (!skb) goto out_aborted; u64_stats_update_begin(&rx_ring->stats.rx.syncp); ++rx_ring->stats.rx.xdp_pass; u64_stats_update_end(&rx_ring->stats.rx.syncp); - aq_get_rxpages_xdp(buff, xdp); return skb; case XDP_TX: xdpf = xdp_convert_buff_to_frame(xdp); -- cgit From 3d87debb8ed2649608ff432699e7c961c0c6f03b Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Wed, 15 Mar 2023 14:14:35 +0100 Subject: net/iucv: Fix size of interrupt data iucv_irq_data needs to be 4 bytes larger. These bytes are not used by the iucv module, but written by the z/VM hypervisor in case a CPU is deconfigured. Reported as: BUG dma-kmalloc-64 (Not tainted): kmalloc Redzone overwritten ----------------------------------------------------------------------------- 0x0000000000400564-0x0000000000400567 @offset=1380. First byte 0x80 instead of 0xcc Allocated in iucv_cpu_prepare+0x44/0xd0 age=167839 cpu=2 pid=1 __kmem_cache_alloc_node+0x166/0x450 kmalloc_node_trace+0x3a/0x70 iucv_cpu_prepare+0x44/0xd0 cpuhp_invoke_callback+0x156/0x2f0 cpuhp_issue_call+0xf0/0x298 __cpuhp_setup_state_cpuslocked+0x136/0x338 __cpuhp_setup_state+0xf4/0x288 iucv_init+0xf4/0x280 do_one_initcall+0x78/0x390 do_initcalls+0x11a/0x140 kernel_init_freeable+0x25e/0x2a0 kernel_init+0x2e/0x170 __ret_from_fork+0x3c/0x58 ret_from_fork+0xa/0x40 Freed in iucv_init+0x92/0x280 age=167839 cpu=2 pid=1 __kmem_cache_free+0x308/0x358 iucv_init+0x92/0x280 do_one_initcall+0x78/0x390 do_initcalls+0x11a/0x140 kernel_init_freeable+0x25e/0x2a0 kernel_init+0x2e/0x170 __ret_from_fork+0x3c/0x58 ret_from_fork+0xa/0x40 Slab 0x0000037200010000 objects=32 used=30 fp=0x0000000000400640 flags=0x1ffff00000010200(slab|head|node=0|zone=0| Object 0x0000000000400540 @offset=1344 fp=0x0000000000000000 Redzone 0000000000400500: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................ Redzone 0000000000400510: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................ Redzone 0000000000400520: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................ Redzone 0000000000400530: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................ Object 0000000000400540: 00 01 00 03 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object 0000000000400550: f3 86 81 f2 f4 82 f8 82 f0 f0 f0 f0 f0 f0 f0 f2 ................ Object 0000000000400560: 00 00 00 00 80 00 00 00 cc cc cc cc cc cc cc cc ................ Object 0000000000400570: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ................ Redzone 0000000000400580: cc cc cc cc cc cc cc cc ........ Padding 00000000004005d4: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ Padding 00000000004005e4: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ Padding 00000000004005f4: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZ CPU: 6 PID: 121030 Comm: 116-pai-crypto. Not tainted 6.3.0-20230221.rc0.git4.99b8246b2d71.300.fc37.s390x+debug #1 Hardware name: IBM 3931 A01 704 (z/VM 7.3.0) Call Trace: [<000000032aa034ec>] dump_stack_lvl+0xac/0x100 [<0000000329f5a6cc>] check_bytes_and_report+0x104/0x140 [<0000000329f5aa78>] check_object+0x370/0x3c0 [<0000000329f5ede6>] free_debug_processing+0x15e/0x348 [<0000000329f5f06a>] free_to_partial_list+0x9a/0x2f0 [<0000000329f5f4a4>] __slab_free+0x1e4/0x3a8 [<0000000329f61768>] __kmem_cache_free+0x308/0x358 [<000000032a91465c>] iucv_cpu_dead+0x6c/0x88 [<0000000329c2fc66>] cpuhp_invoke_callback+0x156/0x2f0 [<000000032aa062da>] _cpu_down.constprop.0+0x22a/0x5e0 [<0000000329c3243e>] cpu_device_down+0x4e/0x78 [<000000032a61dee0>] device_offline+0xc8/0x118 [<000000032a61e048>] online_store+0x60/0xe0 [<000000032a08b6b0>] kernfs_fop_write_iter+0x150/0x1e8 [<0000000329fab65c>] vfs_write+0x174/0x360 [<0000000329fab9fc>] ksys_write+0x74/0x100 [<000000032aa03a5a>] __do_syscall+0x1da/0x208 [<000000032aa177b2>] system_call+0x82/0xb0 INFO: lockdep is turned off. FIX dma-kmalloc-64: Restoring kmalloc Redzone 0x0000000000400564-0x0000000000400567=0xcc FIX dma-kmalloc-64: Object at 0x0000000000400540 not freed Fixes: 2356f4cb1911 ("[S390]: Rewrite of the IUCV base code, part 2") Signed-off-by: Alexandra Winter Link: https://lore.kernel.org/r/20230315131435.4113889-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/iucv/iucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index eb0295d90039..fc3fddeb6f36 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -83,7 +83,7 @@ struct iucv_irq_data { u16 ippathid; u8 ipflags1; u8 iptype; - u32 res2[8]; + u32 res2[9]; }; struct iucv_irq_list { -- cgit From f38373345c65529639a01fba3675eb8cb4c579c3 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 15 Mar 2023 14:41:17 +0100 Subject: i825xx: sni_82596: use eth_hw_addr_set() netdev->dev_addr is now const, we can't write to it directly. Copy scrambled mac address octects into an array then eth_hw_addr_set(). Fixes: adeef3e32146 ("net: constify netdev->dev_addr") Signed-off-by: Thomas Bogendoerfer Reviewed-by: Michal Kubiak Link: https://lore.kernel.org/r/20230315134117.79511-1-tsbogend@alpha.franken.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/i825xx/sni_82596.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index daec9ce04531..54bb4d9a0d1e 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -78,6 +78,7 @@ static int sni_82596_probe(struct platform_device *dev) void __iomem *mpu_addr; void __iomem *ca_addr; u8 __iomem *eth_addr; + u8 mac[ETH_ALEN]; res = platform_get_resource(dev, IORESOURCE_MEM, 0); ca = platform_get_resource(dev, IORESOURCE_MEM, 1); @@ -109,12 +110,13 @@ static int sni_82596_probe(struct platform_device *dev) goto probe_failed; /* someone seems to like messed up stuff */ - netdevice->dev_addr[0] = readb(eth_addr + 0x0b); - netdevice->dev_addr[1] = readb(eth_addr + 0x0a); - netdevice->dev_addr[2] = readb(eth_addr + 0x09); - netdevice->dev_addr[3] = readb(eth_addr + 0x08); - netdevice->dev_addr[4] = readb(eth_addr + 0x07); - netdevice->dev_addr[5] = readb(eth_addr + 0x06); + mac[0] = readb(eth_addr + 0x0b); + mac[1] = readb(eth_addr + 0x0a); + mac[2] = readb(eth_addr + 0x09); + mac[3] = readb(eth_addr + 0x08); + mac[4] = readb(eth_addr + 0x07); + mac[5] = readb(eth_addr + 0x06); + eth_hw_addr_set(netdevice, mac); iounmap(eth_addr); if (netdevice->irq < 0) { -- cgit From 24994513ad13ff2c47ba91d2b5df82c3d496c370 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Thu, 16 Mar 2023 00:53:53 +0800 Subject: selftests: net: devlink_port_split.py: skip test if no suitable device available The `devlink -j port show` command output may not contain the "flavour" key, an example from Ubuntu 22.10 s390x LPAR(5.19.0-37-generic), with mlx4 driver and iproute2-5.15.0: {"port":{"pci/0001:00:00.0/1":{"type":"eth","netdev":"ens301"}, "pci/0001:00:00.0/2":{"type":"eth","netdev":"ens301d1"}, "pci/0002:00:00.0/1":{"type":"eth","netdev":"ens317"}, "pci/0002:00:00.0/2":{"type":"eth","netdev":"ens317d1"}}} This will cause a KeyError exception. Create a validate_devlink_output() to check for this "flavour" from devlink command output to avoid this KeyError exception. Also let it handle the check for `devlink -j dev show` output in main(). Apart from this, if the test was not started because the max lanes of the designated device is 0. The script will still return 0 and thus causing a false-negative test result. Use a found_max_lanes flag to determine if these tests were skipped due to this reason and return KSFT_SKIP to make it more clear. Link: https://bugs.launchpad.net/bugs/1937133 Fixes: f3348a82e727 ("selftests: net: Add port split test") Signed-off-by: Po-Hsu Lin Link: https://lore.kernel.org/r/20230315165353.229590-1-po-hsu.lin@canonical.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/devlink_port_split.py | 36 +++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py index 2b5d6ff87373..2d84c7a0be6b 100755 --- a/tools/testing/selftests/net/devlink_port_split.py +++ b/tools/testing/selftests/net/devlink_port_split.py @@ -59,6 +59,8 @@ class devlink_ports(object): assert stderr == "" ports = json.loads(stdout)['port'] + validate_devlink_output(ports, 'flavour') + for port in ports: if dev in port: if ports[port]['flavour'] == 'physical': @@ -220,6 +222,27 @@ def split_splittable_port(port, k, lanes, dev): unsplit(port.bus_info) +def validate_devlink_output(devlink_data, target_property=None): + """ + Determine if test should be skipped by checking: + 1. devlink_data contains values + 2. The target_property exist in devlink_data + """ + skip_reason = None + if any(devlink_data.values()): + if target_property: + skip_reason = "{} not found in devlink output, test skipped".format(target_property) + for key in devlink_data: + if target_property in devlink_data[key]: + skip_reason = None + else: + skip_reason = 'devlink output is empty, test skipped' + + if skip_reason: + print(skip_reason) + sys.exit(KSFT_SKIP) + + def make_parser(): parser = argparse.ArgumentParser(description='A test for port splitting.') parser.add_argument('--dev', @@ -240,12 +263,9 @@ def main(cmdline=None): stdout, stderr = run_command(cmd) assert stderr == "" + validate_devlink_output(json.loads(stdout)) devs = json.loads(stdout)['dev'] - if devs: - dev = list(devs.keys())[0] - else: - print("no devlink device was found, test skipped") - sys.exit(KSFT_SKIP) + dev = list(devs.keys())[0] cmd = "devlink dev show %s" % dev stdout, stderr = run_command(cmd) @@ -255,6 +275,7 @@ def main(cmdline=None): ports = devlink_ports(dev) + found_max_lanes = False for port in ports.if_names: max_lanes = get_max_lanes(port.name) @@ -277,6 +298,11 @@ def main(cmdline=None): split_splittable_port(port, lane, max_lanes, dev) lane //= 2 + found_max_lanes = True + + if not found_max_lanes: + print(f"Test not started, no port of device {dev} reports max_lanes") + sys.exit(KSFT_SKIP) if __name__ == "__main__": -- cgit From a204b490595de71016b2360a1886ec8c12d0afac Mon Sep 17 00:00:00 2001 From: Joel Selvaraj Date: Sun, 12 Mar 2023 23:14:02 -0500 Subject: scsi: core: Add BLIST_SKIP_VPD_PAGES for SKhynix H28U74301AMR Xiaomi Poco F1 (qcom/sdm845-xiaomi-beryllium*.dts) comes with a SKhynix H28U74301AMR UFS. The sd_read_cpr() operation leads to a 120 second timeout, making the device bootup very slow: [ 121.457736] sd 0:0:0:1: [sdb] tag#23 timing out command, waited 120s Setting the BLIST_SKIP_VPD_PAGES allows the device to skip the failing sd_read_cpr operation and boot normally. Signed-off-by: Joel Selvaraj Link: https://lore.kernel.org/r/20230313041402.39330-1-joelselvaraj.oss@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index bc9d280417f6..3fcaf10a9dfe 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -234,6 +234,7 @@ static struct { {"SGI", "RAID5", "*", BLIST_SPARSELUN}, {"SGI", "TP9100", "*", BLIST_REPORTLUN2}, {"SGI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"SKhynix", "H28U74301AMR", NULL, BLIST_SKIP_VPD_PAGES}, {"IBM", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"SUN", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"DELL", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, -- cgit From 0367076b0817d5c75dfb83001ce7ce5c64d803a9 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Sun, 12 Mar 2023 21:37:10 -0700 Subject: scsi: qla2xxx: Perform lockless command completion in abort path While adding and removing the controller, the following call trace was observed: WARNING: CPU: 3 PID: 623596 at kernel/dma/mapping.c:532 dma_free_attrs+0x33/0x50 CPU: 3 PID: 623596 Comm: sh Kdump: loaded Not tainted 5.14.0-96.el9.x86_64 #1 RIP: 0010:dma_free_attrs+0x33/0x50 Call Trace: qla2x00_async_sns_sp_done+0x107/0x1b0 [qla2xxx] qla2x00_abort_srb+0x8e/0x250 [qla2xxx] ? ql_dbg+0x70/0x100 [qla2xxx] __qla2x00_abort_all_cmds+0x108/0x190 [qla2xxx] qla2x00_abort_all_cmds+0x24/0x70 [qla2xxx] qla2x00_abort_isp_cleanup+0x305/0x3e0 [qla2xxx] qla2x00_remove_one+0x364/0x400 [qla2xxx] pci_device_remove+0x36/0xa0 __device_release_driver+0x17a/0x230 device_release_driver+0x24/0x30 pci_stop_bus_device+0x68/0x90 pci_stop_and_remove_bus_device_locked+0x16/0x30 remove_store+0x75/0x90 kernfs_fop_write_iter+0x11c/0x1b0 new_sync_write+0x11f/0x1b0 vfs_write+0x1eb/0x280 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x80 ? do_user_addr_fault+0x1d8/0x680 ? do_syscall_64+0x69/0x80 ? exc_page_fault+0x62/0x140 ? asm_exc_page_fault+0x8/0x30 entry_SYSCALL_64_after_hwframe+0x44/0xae The command was completed in the abort path during driver unload with a lock held, causing the warning in abort path. Hence complete the command without any lock held. Reported-by: Lin Li Tested-by: Lin Li Cc: stable@vger.kernel.org Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230313043711.13500-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Reviewed-by: John Meneghini Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 80c4ee9df2a4..bee1b8a82020 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1865,6 +1865,17 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { + /* + * perform lockless completion during driver unload + */ + if (qla2x00_chip_is_down(vha)) { + req->outstanding_cmds[cnt] = NULL; + spin_unlock_irqrestore(qp->qp_lock_ptr, flags); + sp->done(sp, res); + spin_lock_irqsave(qp->qp_lock_ptr, flags); + continue; + } + switch (sp->cmd_type) { case TYPE_SRB: qla2x00_abort_srb(qp, sp, res, &flags); -- cgit From d3affdeb400f3adc925bd996f3839481f5291839 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Sun, 12 Mar 2023 21:37:11 -0700 Subject: scsi: qla2xxx: Synchronize the IOCB count to be in order A system hang was observed with the following call trace: BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 15 PID: 86747 Comm: nvme Kdump: loaded Not tainted 6.2.0+ #1 Hardware name: Dell Inc. PowerEdge R6515/04F3CJ, BIOS 2.7.3 03/31/2022 RIP: 0010:__wake_up_common+0x55/0x190 Code: 41 f6 01 04 0f 85 b2 00 00 00 48 8b 43 08 4c 8d 40 e8 48 8d 43 08 48 89 04 24 48 89 c6\ 49 8d 40 18 48 39 c6 0f 84 e9 00 00 00 <49> 8b 40 18 89 6c 24 14 31 ed 4c 8d 60 e8 41 8b 18 f6 c3 04 75 5d RSP: 0018:ffffb05a82afbba0 EFLAGS: 00010082 RAX: 0000000000000000 RBX: ffff8f9b83a00018 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff8f9b83a00020 RDI: ffff8f9b83a00018 RBP: 0000000000000001 R08: ffffffffffffffe8 R09: ffffb05a82afbbf8 R10: 70735f7472617473 R11: 5f30307832616c71 R12: 0000000000000001 R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f815cf4c740(0000) GS:ffff8f9eeed80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000010633a000 CR4: 0000000000350ee0 Call Trace: __wake_up_common_lock+0x83/0xd0 qla_nvme_ls_req+0x21b/0x2b0 [qla2xxx] __nvme_fc_send_ls_req+0x1b5/0x350 [nvme_fc] nvme_fc_xmt_disconnect_assoc+0xca/0x110 [nvme_fc] nvme_fc_delete_association+0x1bf/0x220 [nvme_fc] ? nvme_remove_namespaces+0x9f/0x140 [nvme_core] nvme_do_delete_ctrl+0x5b/0xa0 [nvme_core] nvme_sysfs_delete+0x5f/0x70 [nvme_core] kernfs_fop_write_iter+0x12b/0x1c0 vfs_write+0x2a3/0x3b0 ksys_write+0x5f/0xe0 do_syscall_64+0x5c/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x69/0x90 ? exit_to_user_mode_loop+0xd0/0x130 ? exit_to_user_mode_prepare+0xec/0x100 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x69/0x90 ? syscall_exit_to_user_mode+0x12/0x30 ? do_syscall_64+0x69/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f815cd3eb97 The IOCB counts are out of order and that would block any commands from going out and subsequently hang the system. Synchronize the IOCB count to be in correct order. Fixes: 5f63a163ed2f ("scsi: qla2xxx: Fix exchange oversubscription for management commands") Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230313043711.13500-3-njavali@marvell.com Reviewed-by: Himanshu Madhani Reviewed-by: John Meneghini Tested-by: Lin Li Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_isr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 030625ebb4e6..71feda2cdb63 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1900,6 +1900,8 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func, } req->outstanding_cmds[index] = NULL; + + qla_put_fw_resources(sp->qpair, &sp->iores); return sp; } @@ -3112,7 +3114,6 @@ qla25xx_process_bidir_status_iocb(scsi_qla_host_t *vha, void *pkt, } bsg_reply->reply_payload_rcv_len = 0; - qla_put_fw_resources(sp->qpair, &sp->iores); done: /* Return the vendor specific reply to API */ bsg_reply->reply_data.vendor_reply.vendor_rsp[0] = rval; -- cgit From a13faca032acbf2699293587085293bdfaafc8ae Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 15 Mar 2023 14:21:54 +0800 Subject: scsi: scsi_dh_alua: Fix memleak for 'qdata' in alua_activate() If alua_rtpg_queue() failed from alua_activate(), then 'qdata' is not freed, which will cause following memleak: unreferenced object 0xffff88810b2c6980 (size 32): comm "kworker/u16:2", pid 635322, jiffies 4355801099 (age 1216426.076s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 40 39 24 c1 ff ff ff ff 00 f8 ea 0a 81 88 ff ff @9$............. backtrace: [<0000000098f3a26d>] alua_activate+0xb0/0x320 [<000000003b529641>] scsi_dh_activate+0xb2/0x140 [<000000007b296db3>] activate_path_work+0xc6/0xe0 [dm_multipath] [<000000007adc9ace>] process_one_work+0x3c5/0x730 [<00000000c457a985>] worker_thread+0x93/0x650 [<00000000cb80e628>] kthread+0x1ba/0x210 [<00000000a1e61077>] ret_from_fork+0x22/0x30 Fix the problem by freeing 'qdata' in error path. Fixes: 625fe857e4fa ("scsi: scsi_dh_alua: Check scsi_device_get() return value") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20230315062154.668812-1-yukuai1@huaweicloud.com Reviewed-by: Benjamin Block Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 362fa631f39b..a226dc1b65d7 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -1145,10 +1145,12 @@ static int alua_activate(struct scsi_device *sdev, rcu_read_unlock(); mutex_unlock(&h->init_mutex); - if (alua_rtpg_queue(pg, sdev, qdata, true)) + if (alua_rtpg_queue(pg, sdev, qdata, true)) { fn = NULL; - else + } else { + kfree(qdata); err = SCSI_DH_DEV_OFFLINED; + } kref_put(&pg->kref, release_port_group); out: if (fn) -- cgit From 470efd68a4653d9819d391489886432cd31bcd0b Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Wed, 15 Mar 2023 22:46:18 +0300 Subject: qed/qed_mng_tlv: correctly zero out ->min instead of ->hour This fixes an issue where ->hour would erroneously get zeroed out instead of ->min because of a bad copy paste. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: f240b6882211 ("qed: Add support for processing fcoe tlv request.") Signed-off-by: Daniil Tatianin Link: https://lore.kernel.org/r/20230315194618.579286-1-d-tatianin@yandex-team.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c index 6190adf965bc..f55eed092f25 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c @@ -422,7 +422,7 @@ qed_mfw_get_tlv_time_value(struct qed_mfw_tlv_time *p_time, if (p_time->hour > 23) p_time->hour = 0; if (p_time->min > 59) - p_time->hour = 0; + p_time->min = 0; if (p_time->msec > 999) p_time->msec = 0; if (p_time->usec > 999) -- cgit From 1b0120e4db0bf2838d1ce741195ce4b7cc100b91 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 15 Mar 2023 21:25:17 +0100 Subject: hsr: ratelimit only when errors are printed Recently, when automatically merging -net and net-next in MPTCP devel tree, our CI reported [1] a conflict in hsr, the same as the one reported by Stephen in netdev [2]. When looking at the conflict, I noticed it is in fact the v1 [3] that has been applied in -net and the v2 [4] in net-next. Maybe the v1 was applied by accident. As mentioned by Jakub Kicinski [5], the new condition makes more sense before the net_ratelimit(), not to update net_ratelimit's state which is unnecessary if we're not going to print either way. Here, this modification applies the v2 but in -net. Link: https://github.com/multipath-tcp/mptcp_net-next/actions/runs/4423171069 [1] Link: https://lore.kernel.org/netdev/20230315100914.53fc1760@canb.auug.org.au/ [2] Link: https://lore.kernel.org/netdev/20230307133229.127442-1-koverskeid@gmail.com/ [3] Link: https://lore.kernel.org/netdev/20230309092302.179586-1-koverskeid@gmail.com/ [4] Link: https://lore.kernel.org/netdev/20230308232001.2fb62013@kernel.org/ [5] Fixes: 28e8cabe80f3 ("net: hsr: Don't log netdev_err message on unknown prp dst node") Signed-off-by: Matthieu Baerts Reviewed-by: Steen Hegelund Link: https://lore.kernel.org/r/20230315-net-20230315-hsr_framereg-ratelimit-v1-1-61d2ef176d11@tessares.net Signed-off-by: Jakub Kicinski --- net/hsr/hsr_framereg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 865eda39d601..b77f1189d19d 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -415,7 +415,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, node_dst = find_node_by_addr_A(&port->hsr->node_db, eth_hdr(skb)->h_dest); if (!node_dst) { - if (net_ratelimit() && port->hsr->prot_version != PRP_V1) + if (port->hsr->prot_version != PRP_V1 && net_ratelimit()) netdev_err(skb->dev, "%s: Unknown node\n", __func__); return; } -- cgit From 054abb515f346b8f30a0a11953d9f786d3e76813 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 15 Mar 2023 16:03:49 -0700 Subject: tools: ynl: make definitions optional again definitions are optional, commit in question breaks cli for ethtool. Fixes: 6517a60b0307 ("tools: ynl: move the enum classes to shared code") Reviewed-by: Chuck Lever Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/nlspec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 960a356e8225..e01a72d06638 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -387,7 +387,8 @@ class SpecFamily(SpecElement): def resolve(self): self.resolve_up(super()) - for elem in self.yaml['definitions']: + definitions = self.yaml.get('definitions', []) + for elem in definitions: if elem['type'] == 'enum' or elem['type'] == 'flags': self.consts[elem['name']] = self.new_enum(elem) else: -- cgit From 4e16b6a748df52800c90f3ab181aef8129bd332f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 15 Mar 2023 16:03:50 -0700 Subject: ynl: broaden the license even more I relicensed Netlink spec code to GPL-2.0 OR BSD-3-Clause but we still put a slightly different license on the uAPI header than the rest of the code. Use the Linux-syscall-note on all the specs and all generated code. It's moot for kernel code, but should not hurt. This way the licenses match everywhere. Cc: Chuck Lever Fixes: 37d9df224d1e ("ynl: re-license uniformly under GPL-2.0 OR BSD-3-Clause") Reviewed-by: Chuck Lever Signed-off-by: Jakub Kicinski --- Documentation/netlink/genetlink-c.yaml | 2 +- Documentation/netlink/genetlink-legacy.yaml | 2 +- Documentation/netlink/genetlink.yaml | 2 +- Documentation/netlink/specs/ethtool.yaml | 2 +- Documentation/netlink/specs/fou.yaml | 2 +- Documentation/netlink/specs/netdev.yaml | 2 +- Documentation/userspace-api/netlink/specs.rst | 3 ++- include/uapi/linux/fou.h | 2 +- include/uapi/linux/netdev.h | 2 +- net/core/netdev-genl-gen.c | 2 +- net/core/netdev-genl-gen.h | 2 +- net/ipv4/fou_nl.c | 2 +- net/ipv4/fou_nl.h | 2 +- tools/include/uapi/linux/netdev.h | 2 +- tools/net/ynl/ynl-gen-c.py | 8 ++++---- 15 files changed, 19 insertions(+), 18 deletions(-) diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index f082a5ad7cf1..5c3642b3f802 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) %YAML 1.2 --- $id: http://kernel.org/schemas/netlink/genetlink-c.yaml# diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index c6b8c77f7d12..5e98c6d2b9aa 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) %YAML 1.2 --- $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml# diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index b2d56ab9e615..d35dcd6f8d82 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) %YAML 1.2 --- $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml# diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 18ecb7d90cbe..4727c067e2ba 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) name: ethtool diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml index cff104288723..3e13826a3fdf 100644 --- a/Documentation/netlink/specs/fou.yaml +++ b/Documentation/netlink/specs/fou.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) name: fou diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 753e5914a8b7..b99e7ffef7a1 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) name: netdev diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst index 2122e0c4a399..a22442ba1d30 100644 --- a/Documentation/userspace-api/netlink/specs.rst +++ b/Documentation/userspace-api/netlink/specs.rst @@ -24,7 +24,8 @@ YAML specifications can be found under ``Documentation/netlink/specs/`` This document describes details of the schema. See :doc:`intro-specs` for a practical starting guide. -All specs must be licensed under ``GPL-2.0-only OR BSD-3-Clause`` +All specs must be licensed under +``((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)`` to allow for easy adoption in user space code. Compatibility levels diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index 5041c3598493..b5cd3e7b3775 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN uapi header */ diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index ed134fbdfd32..639524b59930 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 9e10802587fc..3abab70d66dd 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel source */ diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 2c5fc7d1e8a7..74d74fc23167 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN kernel header */ diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c index 5c14fe030eda..6c37c4f98cca 100644 --- a/net/ipv4/fou_nl.c +++ b/net/ipv4/fou_nl.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel source */ diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h index 58b1e1ed4b3b..dbd0780a5d34 100644 --- a/net/ipv4/fou_nl.h +++ b/net/ipv4/fou_nl.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/fou.yaml */ /* YNL-GEN kernel header */ diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index ed134fbdfd32..639524b59930 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index d47376f19de7..3b4d03a50fc1 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) import argparse import collections @@ -2068,12 +2068,12 @@ def main(): _, spec_kernel = find_kernel_root(args.spec) if args.mode == 'uapi': - cw.p('/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */') + cw.p('/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */') else: if args.header: - cw.p('/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */') + cw.p('/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */') else: - cw.p('// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause') + cw.p('// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)') cw.p("/* Do not edit directly, auto-generated from: */") cw.p(f"/*\t{spec_kernel} */") cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */") -- cgit From cfab77c0b54583816faac5f9abdf588c13895a9d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 15 Mar 2023 16:03:51 -0700 Subject: ynl: make the tooling check the license The (only recently documented) expectation is that all specs are under a certain license, but we don't actually enforce it. What's worse we then go ahead and assume the license was right, outputting the expected license into generated files. Fixes: 37d9df224d1e ("ynl: re-license uniformly under GPL-2.0 OR BSD-3-Clause") Reviewed-by: Chuck Lever Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/nlspec.py | 8 ++++++++ tools/net/ynl/ynl-gen-c.py | 13 +++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index e01a72d06638..d04450c2a44a 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -274,6 +274,7 @@ class SpecFamily(SpecElement): Attributes: proto protocol type (e.g. genetlink) + license spec license (loaded from an SPDX tag on the spec) attr_sets dict of attribute sets msgs dict of all messages (index by name) @@ -283,6 +284,13 @@ class SpecFamily(SpecElement): """ def __init__(self, spec_path, schema_path=None): with open(spec_path, "r") as stream: + prefix = '# SPDX-License-Identifier: ' + first = stream.readline().strip() + if not first.startswith(prefix): + raise Exception('SPDX license tag required in the spec') + self.license = first[len(prefix):] + + stream.seek(0) spec = yaml.safe_load(stream) self._resolution_list = [] diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 3b4d03a50fc1..c16671a02621 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2059,6 +2059,10 @@ def main(): try: parsed = Family(args.spec) + if parsed.license != '((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)': + print('Spec license:', parsed.license) + print('License must be: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)') + os.sys.exit(1) except yaml.YAMLError as exc: print(exc) os.sys.exit(1) @@ -2067,13 +2071,10 @@ def main(): cw = CodeWriter(BaseNlLib(), out_file) _, spec_kernel = find_kernel_root(args.spec) - if args.mode == 'uapi': - cw.p('/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */') + if args.mode == 'uapi' or args.header: + cw.p(f'/* SPDX-License-Identifier: {parsed.license} */') else: - if args.header: - cw.p('/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */') - else: - cw.p('// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)') + cw.p(f'// SPDX-License-Identifier: {parsed.license}') cw.p("/* Do not edit directly, auto-generated from: */") cw.p(f"/*\t{spec_kernel} */") cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */") -- cgit From 5ae06327a3a5bad4ee246d81df203b1b00a7b390 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 16 Mar 2023 01:19:16 +0200 Subject: net: dsa: microchip: fix RGMII delay configuration on KSZ8765/KSZ8794/KSZ8795 The blamed commit has replaced a ksz_write8() call to address REG_PORT_5_CTRL_6 (0x56) with a ksz_set_xmii() -> ksz_pwrite8() call to regs[P_XMII_CTRL_1], which is also defined as 0x56 for ksz8795_regs[]. The trouble is that, when compared to ksz_write8(), ksz_pwrite8() also adjusts the register offset with the port base address. So in reality, ksz_pwrite8(offset=0x56) accesses register 0x56 + 0x50 = 0xa6, which in this switch appears to be unmapped, and the RGMII delay configuration on the CPU port does nothing. So if the switch wasn't fine with the RGMII delay configuration done through pin strapping and relied on Linux to apply a different one in order to pass traffic, this is now broken. Using the offset translation logic imposed by ksz_pwrite8(), the correct value for regs[P_XMII_CTRL_1] should have been 0x6 on ksz8795_regs[], in order to really end up accessing register 0x56. Static code analysis shows that, despite there being multiple other accesses to regs[P_XMII_CTRL_1] in this driver, the only code path that is applicable to ksz8795_regs[] and ksz8_dev_ops is ksz_set_xmii(). Therefore, the problem is isolated to RGMII delays. In its current form, ksz8795_regs[] contains the same value for P_XMII_CTRL_0 and for P_XMII_CTRL_1, and this raises valid suspicions that writes made by the driver to regs[P_XMII_CTRL_0] might overwrite writes made to regs[P_XMII_CTRL_1] or vice versa. Again, static analysis shows that the only accesses to P_XMII_CTRL_0 from the driver are made from code paths which are not reachable with ksz8_dev_ops. So the accesses made by ksz_set_xmii() are safe for this switch family. [ vladimiroltean: rewrote commit message ] Fixes: c476bede4b0f ("net: dsa: microchip: ksz8795: use common xmii function") Signed-off-by: Marek Vasut Signed-off-by: Vladimir Oltean Acked-by: Arun Ramadoss Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230315231916.2998480-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 729b36eeb2c4..7fc2155d93d6 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -319,7 +319,7 @@ static const u16 ksz8795_regs[] = { [S_BROADCAST_CTRL] = 0x06, [S_MULTICAST_CTRL] = 0x04, [P_XMII_CTRL_0] = 0x06, - [P_XMII_CTRL_1] = 0x56, + [P_XMII_CTRL_1] = 0x06, }; static const u32 ksz8795_masks[] = { -- cgit From 8de2bd02439eb839a452a853c1004c2c45ff6fef Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 16 Mar 2023 11:37:52 +0800 Subject: Revert "net/sched: act_api: move TCA_EXT_WARN_MSG to the correct hierarchy" This reverts commit 923b2e30dc9cd05931da0f64e2e23d040865c035. This is not a correct fix as TCA_EXT_WARN_MSG is not a hierarchy to TCA_ACT_TAB. I didn't notice the TC actions use different enum when adding TCA_EXT_WARN_MSG. To fix the difference I will add a new WARN enum in TCA_ROOT_MAX as Jamal suggested. Signed-off-by: Hangbin Liu Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski --- net/sched/act_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 34c508675041..fce522886099 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1596,12 +1596,12 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], if (tcf_action_dump(skb, actions, bind, ref, false) < 0) goto out_nlmsg_trim; + nla_nest_end(skb, nest); + if (extack && extack->_msg && nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; - nla_nest_end(skb, nest); - nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -- cgit From 2f59823fe696caa844249a90bb3f9aeda69cfe5c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 16 Mar 2023 11:37:53 +0800 Subject: net/sched: act_api: add specific EXT_WARN_MSG for tc action In my previous commit 0349b8779cc9 ("sched: add new attr TCA_EXT_WARN_MSG to report tc extact message") I didn't notice the tc action use different enum with filter. So we can't use TCA_EXT_WARN_MSG directly for tc action. Let's add a TCA_ROOT_EXT_WARN_MSG for tc action specifically and put this param before going to the TCA_ACT_TAB nest. Fixes: 0349b8779cc9 ("sched: add new attr TCA_EXT_WARN_MSG to report tc extact message") Signed-off-by: Hangbin Liu Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski --- include/uapi/linux/rtnetlink.h | 1 + net/sched/act_api.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 25a0af57dd5e..51c13cf9c5ae 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -789,6 +789,7 @@ enum { TCA_ROOT_FLAGS, TCA_ROOT_COUNT, TCA_ROOT_TIME_DELTA, /* in msecs */ + TCA_ROOT_EXT_WARN_MSG, __TCA_ROOT_MAX, #define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) }; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fce522886099..296fc1afedd8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1589,6 +1589,10 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], t->tca__pad1 = 0; t->tca__pad2 = 0; + if (extack && extack->_msg && + nla_put_string(skb, TCA_ROOT_EXT_WARN_MSG, extack->_msg)) + goto out_nlmsg_trim; + nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) goto out_nlmsg_trim; @@ -1598,10 +1602,6 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], nla_nest_end(skb, nest); - if (extack && extack->_msg && - nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) - goto out_nlmsg_trim; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; -- cgit From 769639c1fe8a98129aa97c8ee981639db1e8955c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 16 Mar 2023 15:02:34 -0700 Subject: net: xdp: don't call notifiers during driver init Drivers will commonly perform feature setting during init, if they use the xdp_set_features_flag() helper they'll likely run into an ASSERT_RTNL() inside call_netdevice_notifiers_info(). Don't call the notifier until the device is actually registered. Nothing should be tracking the device until its registered and after its unregistration has started. Fixes: 4d5ab0ad964d ("net/mlx5e: take into account device reconfiguration for xdp_features flag") Link: https://lore.kernel.org/r/20230316220234.598091-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/xdp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/xdp.c b/net/core/xdp.c index 87e654b7d06c..b5737e47ec41 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -781,7 +781,9 @@ void xdp_set_features_flag(struct net_device *dev, xdp_features_t val) return; dev->xdp_features = val; - call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); + + if (dev->reg_state == NETREG_REGISTERED) + call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev); } EXPORT_SYMBOL_GPL(xdp_set_features_flag); -- cgit From dd172d0c2cea3c14ca9d007eeab51bec7676ece7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 16 Mar 2023 09:51:33 -0500 Subject: net: ipa: reg: include When "reg.h" got created, it included calls to WARN() and WARN_ON(). Those macros are defined via . In addition, it uses is_power_of_2(), which is defined in . Include those files so IPA "reg.h" has access to all definitions it requires. Meanwhile, is included but nothing defined therein is required directly in "reg.h", so get rid of that. Fixes: 81772e444dbe ("net: ipa: start generalizing "ipa_reg"") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/reg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipa/reg.h b/drivers/net/ipa/reg.h index 57b457f39b6e..2ee07eebca67 100644 --- a/drivers/net/ipa/reg.h +++ b/drivers/net/ipa/reg.h @@ -6,7 +6,8 @@ #define _REG_H_ #include -#include +#include +#include /** * struct reg - A register descriptor -- cgit From 55c49e5c94411a82a0ba06dc615ed12b6387dec5 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 16 Mar 2023 09:51:34 -0500 Subject: net: ipa: add two missing declarations When gsi_reg_init() got added, its declaration was added to "gsi_reg.h" without declaring the two struct pointer types it uses. Add these struct declarations to "gsi_reg.h". Fixes: 3c506add35c7 ("net: ipa: introduce gsi_reg_init()") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi_reg.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h index f62f0a5c653d..48fde65fa2e8 100644 --- a/drivers/net/ipa/gsi_reg.h +++ b/drivers/net/ipa/gsi_reg.h @@ -10,6 +10,10 @@ #include +struct platform_device; + +struct gsi; + /** * DOC: GSI Registers * -- cgit From 786bbe50e1d5777f0b6ec7b4c2de6189d6f7feb4 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 16 Mar 2023 09:51:35 -0500 Subject: net: ipa: kill FILT_ROUT_CACHE_CFG IPA register A recent commit defined a few IPA registers used for IPA v5.0+. One of those was a mistake. Although the filter and router caches get *flushed* using a single register, they use distinct registers (ENDP_FILTER_CACHE_CFG and ENDP_ROUTER_CACHE_CFG) for configuration. And although there *exists* a FILT_ROUT_CACHE_CFG register, it is not needed in upstream code. So get rid of definitions related to FILT_ROUT_CACHE_CFG, because they are not needed. Fixes: 8ba59716d16a ("net: ipa: define IPA v5.0+ registers") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_reg.c | 4 ++-- drivers/net/ipa/ipa_reg.h | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ipa/ipa_reg.c b/drivers/net/ipa/ipa_reg.c index 735fa6591609..463a31dfa9f4 100644 --- a/drivers/net/ipa/ipa_reg.c +++ b/drivers/net/ipa/ipa_reg.c @@ -39,7 +39,8 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id) return version <= IPA_VERSION_3_1; case ENDP_FILTER_ROUTER_HSH_CFG: - return version != IPA_VERSION_4_2; + return version < IPA_VERSION_5_0 && + version != IPA_VERSION_4_2; case IRQ_SUSPEND_EN: case IRQ_SUSPEND_CLR: @@ -52,7 +53,6 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id) case QSB_MAX_WRITES: case QSB_MAX_READS: case FILT_ROUT_HASH_EN: - case FILT_ROUT_CACHE_CFG: case FILT_ROUT_HASH_FLUSH: case FILT_ROUT_CACHE_FLUSH: case STATE_AGGR_ACTIVE: diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index 28aa1351dd48..ff2be8be0f68 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -61,7 +61,6 @@ enum ipa_reg_id { QSB_MAX_WRITES, QSB_MAX_READS, FILT_ROUT_HASH_EN, /* Not IPA v5.0+ */ - FILT_ROUT_CACHE_CFG, /* IPA v5.0+ */ FILT_ROUT_HASH_FLUSH, /* Not IPA v5.0+ */ FILT_ROUT_CACHE_FLUSH, /* IPA v5.0+ */ STATE_AGGR_ACTIVE, @@ -206,14 +205,6 @@ enum ipa_reg_qsb_max_reads_field_id { GEN_QMB_1_MAX_READS_BEATS, /* IPA v4.0+ */ }; -/* FILT_ROUT_CACHE_CFG register */ -enum ipa_reg_filt_rout_cache_cfg_field_id { - ROUTER_CACHE_EN, - FILTER_CACHE_EN, - LOW_PRI_HASH_HIT_DISABLE, - LRU_EVICTION_THRESHOLD, -}; - /* FILT_ROUT_HASH_EN and FILT_ROUT_HASH_FLUSH registers */ enum ipa_reg_filt_rout_hash_field_id { IPV6_ROUTER_HASH, -- cgit From 21e8aaca401ce2b45ece1d8fabd29d422de7b48e Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 16 Mar 2023 09:51:36 -0500 Subject: net: ipa: fix some register validity checks A recent commit defined HW_PARAM_4 as a GSI register ID but did not add it to gsi_reg_id_valid() to indicate it's valid (for IPA v5.0+). Add version checks for the HW_PARAM_2 and INTER_EE IRQ GSI registers there as well. IPA v5.0 supports up to 8 source and destination resource groups. Update the validity check (and the comments where the register IDs are defined) to reflect that. Similarly update comments and validity checks for the hash/cache-related registers. Note that this patch fixes an omission and constrains things further, but these don't technically represent bugs. Fixes: f651334e1ef5 ("net: ipa: add HW_PARAM_4 GSI register") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi_reg.c | 9 ++++++++- drivers/net/ipa/ipa_reg.c | 24 ++++++++++++++++-------- drivers/net/ipa/ipa_reg.h | 12 ++++++------ 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/drivers/net/ipa/gsi_reg.c b/drivers/net/ipa/gsi_reg.c index 1412b67304c8..1651fbad4bd5 100644 --- a/drivers/net/ipa/gsi_reg.c +++ b/drivers/net/ipa/gsi_reg.c @@ -15,6 +15,14 @@ static bool gsi_reg_id_valid(struct gsi *gsi, enum gsi_reg_id reg_id) switch (reg_id) { case INTER_EE_SRC_CH_IRQ_MSK: case INTER_EE_SRC_EV_CH_IRQ_MSK: + return gsi->version >= IPA_VERSION_3_5; + + case HW_PARAM_2: + return gsi->version >= IPA_VERSION_3_5_1; + + case HW_PARAM_4: + return gsi->version >= IPA_VERSION_5_0; + case CH_C_CNTXT_0: case CH_C_CNTXT_1: case CH_C_CNTXT_2: @@ -43,7 +51,6 @@ static bool gsi_reg_id_valid(struct gsi *gsi, enum gsi_reg_id reg_id) case CH_CMD: case EV_CH_CMD: case GENERIC_CMD: - case HW_PARAM_2: case CNTXT_TYPE_IRQ: case CNTXT_TYPE_IRQ_MSK: case CNTXT_SRC_CH_IRQ: diff --git a/drivers/net/ipa/ipa_reg.c b/drivers/net/ipa/ipa_reg.c index 463a31dfa9f4..3f475428dddd 100644 --- a/drivers/net/ipa/ipa_reg.c +++ b/drivers/net/ipa/ipa_reg.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. - * Copyright (C) 2019-2022 Linaro Ltd. + * Copyright (C) 2019-2023 Linaro Ltd. */ #include @@ -15,6 +15,17 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id) enum ipa_version version = ipa->version; switch (reg_id) { + case FILT_ROUT_HASH_EN: + return version == IPA_VERSION_4_2; + + case FILT_ROUT_HASH_FLUSH: + return version < IPA_VERSION_5_0 && version != IPA_VERSION_4_2; + + case FILT_ROUT_CACHE_FLUSH: + case ENDP_FILTER_CACHE_CFG: + case ENDP_ROUTER_CACHE_CFG: + return version >= IPA_VERSION_5_0; + case IPA_BCR: case COUNTER_CFG: return version < IPA_VERSION_4_5; @@ -32,11 +43,13 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id) case SRC_RSRC_GRP_45_RSRC_TYPE: case DST_RSRC_GRP_45_RSRC_TYPE: return version <= IPA_VERSION_3_1 || - version == IPA_VERSION_4_5; + version == IPA_VERSION_4_5 || + version == IPA_VERSION_5_0; case SRC_RSRC_GRP_67_RSRC_TYPE: case DST_RSRC_GRP_67_RSRC_TYPE: - return version <= IPA_VERSION_3_1; + return version <= IPA_VERSION_3_1 || + version == IPA_VERSION_5_0; case ENDP_FILTER_ROUTER_HSH_CFG: return version < IPA_VERSION_5_0 && @@ -52,9 +65,6 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id) case SHARED_MEM_SIZE: case QSB_MAX_WRITES: case QSB_MAX_READS: - case FILT_ROUT_HASH_EN: - case FILT_ROUT_HASH_FLUSH: - case FILT_ROUT_CACHE_FLUSH: case STATE_AGGR_ACTIVE: case LOCAL_PKT_PROC_CNTXT: case AGGR_FORCE_CLOSE: @@ -76,8 +86,6 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id) case ENDP_INIT_RSRC_GRP: case ENDP_INIT_SEQ: case ENDP_STATUS: - case ENDP_FILTER_CACHE_CFG: - case ENDP_ROUTER_CACHE_CFG: case IPA_IRQ_STTS: case IPA_IRQ_EN: case IPA_IRQ_CLR: diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index ff2be8be0f68..7dd65d39333d 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -60,8 +60,8 @@ enum ipa_reg_id { SHARED_MEM_SIZE, QSB_MAX_WRITES, QSB_MAX_READS, - FILT_ROUT_HASH_EN, /* Not IPA v5.0+ */ - FILT_ROUT_HASH_FLUSH, /* Not IPA v5.0+ */ + FILT_ROUT_HASH_EN, /* IPA v4.2 */ + FILT_ROUT_HASH_FLUSH, /* Not IPA v4.2 nor IPA v5.0+ */ FILT_ROUT_CACHE_FLUSH, /* IPA v5.0+ */ STATE_AGGR_ACTIVE, IPA_BCR, /* Not IPA v4.5+ */ @@ -76,12 +76,12 @@ enum ipa_reg_id { TIMERS_PULSE_GRAN_CFG, /* IPA v4.5+ */ SRC_RSRC_GRP_01_RSRC_TYPE, SRC_RSRC_GRP_23_RSRC_TYPE, - SRC_RSRC_GRP_45_RSRC_TYPE, /* Not IPA v3.5+, IPA v4.5 */ - SRC_RSRC_GRP_67_RSRC_TYPE, /* Not IPA v3.5+ */ + SRC_RSRC_GRP_45_RSRC_TYPE, /* Not IPA v3.5+; IPA v4.5, IPA v5.0 */ + SRC_RSRC_GRP_67_RSRC_TYPE, /* Not IPA v3.5+; IPA v5.0 */ DST_RSRC_GRP_01_RSRC_TYPE, DST_RSRC_GRP_23_RSRC_TYPE, - DST_RSRC_GRP_45_RSRC_TYPE, /* Not IPA v3.5+, IPA v4.5 */ - DST_RSRC_GRP_67_RSRC_TYPE, /* Not IPA v3.5+ */ + DST_RSRC_GRP_45_RSRC_TYPE, /* Not IPA v3.5+; IPA v4.5, IPA v5.0 */ + DST_RSRC_GRP_67_RSRC_TYPE, /* Not IPA v3.5+; IPA v5.0 */ ENDP_INIT_CTRL, /* Not IPA v4.2+ for TX, not IPA v4.0+ for RX */ ENDP_INIT_CFG, ENDP_INIT_NAT, /* TX only */ -- cgit From c00133a9e87ea5324d0b883d801eb6656f26739b Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 16 Mar 2023 08:26:47 +0100 Subject: drm/ttm: drop extra ttm_bo_put in ttm_bo_cleanup_refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That was accidentially left over when we switched to the delayed delete worker. Suggested-by: Matthew Auld Signed-off-by: Christian König Fixes: 9bff18d13473 ("drm/ttm: use per BO cleanup workers") Reported-by: Steven Rostedt (Google) Tested-by: Steven Rostedt (Google) Reviewed-by: Matthew Auld Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20230316072647.406707-1-christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 326a3d13a829..c286c6ffe07f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -295,8 +295,6 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (unlock_resv) dma_resv_unlock(bo->base.resv); - ttm_bo_put(bo); - return 0; } -- cgit From 90de546d9a0b3c771667af18bb3f80567eabb89b Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 15 Mar 2023 14:00:21 +0800 Subject: ethernet: sun: add check for the mdesc_grab() In vnet_port_probe() and vsw_port_probe(), we should check the return value of mdesc_grab() as it may return NULL which can caused NPD bugs. Fixes: 5d01fa0c6bd8 ("ldmvsw: Add ldmvsw.c driver code") Fixes: 43fdf27470b2 ("[SPARC64]: Abstract out mdesc accesses for better MD update handling.") Signed-off-by: Liang He Reviewed-by: Piotr Raczynski Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/ldmvsw.c | 3 +++ drivers/net/ethernet/sun/sunvnet.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 8addee6d04bd..734a817d3c94 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -287,6 +287,9 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) hp = mdesc_grab(); + if (!hp) + return -ENODEV; + rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len); err = -ENODEV; if (!rmac) { diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index fe86fbd58586..e220620d0ffc 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -433,6 +433,9 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) hp = mdesc_grab(); + if (!hp) + return -ENODEV; + vp = vnet_find_parent(hp, vdev->mp, vdev); if (IS_ERR(vp)) { pr_err("Cannot find port parent vnet\n"); -- cgit From e05bb97d9c9dd4ba5739a27921044c935a7fb3be Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 15 Mar 2023 16:04:23 +0900 Subject: net: renesas: rswitch: Fix the output value of quote from rswitch_rx() If the RX descriptor doesn't have any data, the output value of quote from rswitch_rx() will be increased unexpectedily. So, fix it. Reported-by: Volodymyr Babchuk Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") Signed-off-by: Yoshihiro Shimoda Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 853394e5bb8b..46d8d9c8fc19 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -702,13 +702,14 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) u16 pkt_len; u32 get_ts; + if (*quota <= 0) + return true; + boguscnt = min_t(int, gq->ring_size, *quota); limit = boguscnt; desc = &gq->rx_ring[gq->cur]; while ((desc->desc.die_dt & DT_MASK) != DT_FEMPTY) { - if (--boguscnt < 0) - break; dma_rmb(); pkt_len = le16_to_cpu(desc->desc.info_ds) & RX_DS; skb = gq->skbs[gq->cur]; @@ -734,6 +735,9 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) gq->cur = rswitch_next_queue_index(gq, true, 1); desc = &gq->rx_ring[gq->cur]; + + if (--boguscnt <= 0) + break; } num = rswitch_get_num_cur_queues(gq); @@ -745,7 +749,7 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) goto err; gq->dirty = rswitch_next_queue_index(gq, false, num); - *quota -= limit - (++boguscnt); + *quota -= limit - boguscnt; return boguscnt <= 0; -- cgit From 2c59e993c86ad57afde26eaf1beb35694da5fbfe Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 15 Mar 2023 16:04:24 +0900 Subject: net: renesas: rswitch: Fix GWTSDIE register handling Since the GWCA has the TX timestamp feature, this driver should not disable it if one of ports is opened. So, fix it. Reported-by: Phong Hoang Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy") Signed-off-by: Yoshihiro Shimoda Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 9 +++++++-- drivers/net/ethernet/renesas/rswitch.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 46d8d9c8fc19..c4f93d24c6a4 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1441,7 +1441,10 @@ static int rswitch_open(struct net_device *ndev) rswitch_enadis_data_irq(rdev->priv, rdev->tx_queue->index, true); rswitch_enadis_data_irq(rdev->priv, rdev->rx_queue->index, true); - iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDIE); + if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS)) + iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDIE); + + bitmap_set(rdev->priv->opened_ports, rdev->port, 1); return 0; }; @@ -1452,8 +1455,10 @@ static int rswitch_stop(struct net_device *ndev) struct rswitch_gwca_ts_info *ts_info, *ts_info2; netif_tx_stop_all_queues(ndev); + bitmap_clear(rdev->priv->opened_ports, rdev->port, 1); - iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID); + if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS)) + iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID); list_for_each_entry_safe(ts_info, ts_info2, &rdev->priv->gwca.ts_info_list, list) { if (ts_info->port != rdev->port) diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index 27d3d38c055f..b3e0411b408e 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -998,6 +998,7 @@ struct rswitch_private { struct rcar_gen4_ptp_private *ptp_priv; struct rswitch_device *rdev[RSWITCH_NUM_PORTS]; + DECLARE_BITMAP(opened_ports, RSWITCH_NUM_PORTS); struct rswitch_gwca gwca; struct rswitch_etha etha[RSWITCH_NUM_PORTS]; -- cgit From 9ec7eb60dcbcb6c41076defbc5df7bbd95ceaba5 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 15 Mar 2023 13:18:40 +0200 Subject: bonding: restore IFF_MASTER/SLAVE flags on bond enslave ether type change Add bond_ether_setup helper which is used to fix ether_setup() calls in the bonding driver. It takes care of both IFF_MASTER and IFF_SLAVE flags, the former is always restored and the latter only if it was set. If the bond enslaves non-ARPHRD_ETHER device (changes its type), then releases it and enslaves ARPHRD_ETHER device (changes back) then we use ether_setup() to restore the bond device type but it also resets its flags and removes IFF_MASTER and IFF_SLAVE[1]. Use the bond_ether_setup helper to restore both after such transition. [1] reproduce (nlmon is non-ARPHRD_ETHER): $ ip l add nlmon0 type nlmon $ ip l add bond2 type bond mode active-backup $ ip l set nlmon0 master bond2 $ ip l set nlmon0 nomaster $ ip l add bond1 type bond (we use bond1 as ARPHRD_ETHER device to restore bond2's mode) $ ip l set bond1 master bond2 $ ip l sh dev bond2 37: bond2: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether be:d7:c5:40:5b:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500 (notice bond2's IFF_MASTER is missing) Fixes: e36b9d16c6a6 ("bonding: clean muticast addresses when device changes type") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 00646aa315c3..4bd911f9d3f9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1775,6 +1775,19 @@ void bond_lower_state_changed(struct slave *slave) slave_err(bond_dev, slave_dev, "Error: %s\n", errmsg); \ } while (0) +/* The bonding driver uses ether_setup() to convert a master bond device + * to ARPHRD_ETHER, that resets the target netdevice's flags so we always + * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE if it was set + */ +static void bond_ether_setup(struct net_device *bond_dev) +{ + unsigned int slave_flag = bond_dev->flags & IFF_SLAVE; + + ether_setup(bond_dev); + bond_dev->flags |= IFF_MASTER | slave_flag; + bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; +} + /* enslave device to bond device */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, struct netlink_ext_ack *extack) @@ -1866,10 +1879,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (slave_dev->type != ARPHRD_ETHER) bond_setup_by_slave(bond_dev, slave_dev); - else { - ether_setup(bond_dev); - bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; - } + else + bond_ether_setup(bond_dev); call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, bond_dev); -- cgit From e667d469098671261d558be0cd93dca4d285ce1e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 15 Mar 2023 13:18:41 +0200 Subject: bonding: restore bond's IFF_SLAVE flag if a non-eth dev enslave fails syzbot reported a warning[1] where the bond device itself is a slave and we try to enslave a non-ethernet device as the first slave which fails but then in the error path when ether_setup() restores the bond device it also clears all flags. In my previous fix[2] I restored the IFF_MASTER flag, but I didn't consider the case that the bond device itself might also be a slave with IFF_SLAVE set, so we need to restore that flag as well. Use the bond_ether_setup helper which does the right thing and restores the bond's flags properly. Steps to reproduce using a nlmon dev: $ ip l add nlmon0 type nlmon $ ip l add bond1 type bond $ ip l add bond2 type bond $ ip l set bond1 master bond2 $ ip l set dev nlmon0 master bond1 $ ip -d l sh dev bond1 22: bond1: mtu 1500 qdisc noqueue master bond2 state DOWN mode DEFAULT group default qlen 1000 (now bond1's IFF_SLAVE flag is gone and we'll hit a warning[3] if we try to delete it) [1] https://syzkaller.appspot.com/bug?id=391c7b1f6522182899efba27d891f1743e8eb3ef [2] commit 7d5cd2ce5292 ("bonding: correctly handle bonding type change on enslave failure") [3] example warning: [ 27.008664] bond1: (slave nlmon0): The slave device specified does not support setting the MAC address [ 27.008692] bond1: (slave nlmon0): Error -95 calling set_mac_address [ 32.464639] bond1 (unregistering): Released all slaves [ 32.464685] ------------[ cut here ]------------ [ 32.464686] WARNING: CPU: 1 PID: 2004 at net/core/dev.c:10829 unregister_netdevice_many+0x72a/0x780 [ 32.464694] Modules linked in: br_netfilter bridge bonding virtio_net [ 32.464699] CPU: 1 PID: 2004 Comm: ip Kdump: loaded Not tainted 5.18.0-rc3+ #47 [ 32.464703] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.1-2.fc37 04/01/2014 [ 32.464704] RIP: 0010:unregister_netdevice_many+0x72a/0x780 [ 32.464707] Code: 99 fd ff ff ba 90 1a 00 00 48 c7 c6 f4 02 66 96 48 c7 c7 20 4d 35 96 c6 05 fa c7 2b 02 01 e8 be 6f 4a 00 0f 0b e9 73 fd ff ff <0f> 0b e9 5f fd ff ff 80 3d e3 c7 2b 02 00 0f 85 3b fd ff ff ba 59 [ 32.464710] RSP: 0018:ffffa006422d7820 EFLAGS: 00010206 [ 32.464712] RAX: ffff8f6e077140a0 RBX: ffffa006422d7888 RCX: 0000000000000000 [ 32.464714] RDX: ffff8f6e12edbe58 RSI: 0000000000000296 RDI: ffffffff96d4a520 [ 32.464716] RBP: ffff8f6e07714000 R08: ffffffff96d63600 R09: ffffa006422d7728 [ 32.464717] R10: 0000000000000ec0 R11: ffffffff9698c988 R12: ffff8f6e12edb140 [ 32.464719] R13: dead000000000122 R14: dead000000000100 R15: ffff8f6e12edb140 [ 32.464723] FS: 00007f297c2f1740(0000) GS:ffff8f6e5d900000(0000) knlGS:0000000000000000 [ 32.464725] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 32.464726] CR2: 00007f297bf1c800 CR3: 00000000115e8000 CR4: 0000000000350ee0 [ 32.464730] Call Trace: [ 32.464763] [ 32.464767] rtnl_dellink+0x13e/0x380 [ 32.464776] ? cred_has_capability.isra.0+0x68/0x100 [ 32.464780] ? __rtnl_unlock+0x33/0x60 [ 32.464783] ? bpf_lsm_capset+0x10/0x10 [ 32.464786] ? security_capable+0x36/0x50 [ 32.464790] rtnetlink_rcv_msg+0x14e/0x3b0 [ 32.464792] ? _copy_to_iter+0xb1/0x790 [ 32.464796] ? post_alloc_hook+0xa0/0x160 [ 32.464799] ? rtnl_calcit.isra.0+0x110/0x110 [ 32.464802] netlink_rcv_skb+0x50/0xf0 [ 32.464806] netlink_unicast+0x216/0x340 [ 32.464809] netlink_sendmsg+0x23f/0x480 [ 32.464812] sock_sendmsg+0x5e/0x60 [ 32.464815] ____sys_sendmsg+0x22c/0x270 [ 32.464818] ? import_iovec+0x17/0x20 [ 32.464821] ? sendmsg_copy_msghdr+0x59/0x90 [ 32.464823] ? do_set_pte+0xa0/0xe0 [ 32.464828] ___sys_sendmsg+0x81/0xc0 [ 32.464832] ? mod_objcg_state+0xc6/0x300 [ 32.464835] ? refill_obj_stock+0xa9/0x160 [ 32.464838] ? memcg_slab_free_hook+0x1a5/0x1f0 [ 32.464842] __sys_sendmsg+0x49/0x80 [ 32.464847] do_syscall_64+0x3b/0x90 [ 32.464851] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 32.464865] RIP: 0033:0x7f297bf2e5e7 [ 32.464868] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 32.464869] RSP: 002b:00007ffd96c824c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 32.464872] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f297bf2e5e7 [ 32.464874] RDX: 0000000000000000 RSI: 00007ffd96c82540 RDI: 0000000000000003 [ 32.464875] RBP: 00000000640f19de R08: 0000000000000001 R09: 000000000000007c [ 32.464876] R10: 00007f297bffabe0 R11: 0000000000000246 R12: 0000000000000001 [ 32.464877] R13: 00007ffd96c82d20 R14: 00007ffd96c82610 R15: 000055bfe38a7020 [ 32.464881] [ 32.464882] ---[ end trace 0000000000000000 ]--- Fixes: 7d5cd2ce5292 ("bonding: correctly handle bonding type change on enslave failure") Reported-by: syzbot+9dfc3f3348729cc82277@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=391c7b1f6522182899efba27d891f1743e8eb3ef Signed-off-by: Nikolay Aleksandrov Reviewed-by: Michal Kubiak Acked-by: Jonathan Toppins Acked-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4bd911f9d3f9..236e5219c811 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2300,9 +2300,7 @@ err_undo_flags: eth_hw_addr_random(bond_dev); if (bond_dev->type != ARPHRD_ETHER) { dev_close(bond_dev); - ether_setup(bond_dev); - bond_dev->flags |= IFF_MASTER; - bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; + bond_ether_setup(bond_dev); } } -- cgit From 222c94ec0ad48b951f0f692a7cf5bcf7a6bcb6b1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 15 Mar 2023 13:18:42 +0200 Subject: selftests: bonding: add tests for ether type changes Add new network selftests for the bonding device which exercise the ether type changing call paths. They also test for the recent syzbot bug[1] which causes a warning and results in wrong device flags (IFF_SLAVE missing). The test adds three bond devices and a nlmon device, enslaves one of the bond devices to the other and then uses the nlmon device for successful and unsuccesful enslaves both of which change the bond ether type. Thus we can test for both MASTER and SLAVE flags at the same time. If the flags are properly restored we get: TEST: Change ether type of an enslaved bond device with unsuccessful enslave [ OK ] TEST: Change ether type of an enslaved bond device with successful enslave [ OK ] [1] https://syzkaller.appspot.com/bug?id=391c7b1f6522182899efba27d891f1743e8eb3ef Signed-off-by: Nikolay Aleksandrov Reviewed-by: Michal Kubiak Acked-by: Jonathan Toppins Acked-by: Jay Vosburgh Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond-eth-type-change.sh | 85 ++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 8e3b786a748f..a39bb2560d9b 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -8,7 +8,8 @@ TEST_PROGS := \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ - option_prio.sh + option_prio.sh \ + bond-eth-type-change.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh new file mode 100755 index 000000000000..5cdd22048ba7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bond device ether type changing +# + +ALL_TESTS=" + bond_test_unsuccessful_enslave_type_change + bond_test_successful_enslave_type_change +" +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/net_forwarding_lib.sh + +bond_check_flags() +{ + local bonddev=$1 + + ip -d l sh dev "$bonddev" | grep -q "MASTER" + check_err $? "MASTER flag is missing from the bond device" + + ip -d l sh dev "$bonddev" | grep -q "SLAVE" + check_err $? "SLAVE flag is missing from the bond device" +} + +# test enslaved bond dev type change from ARPHRD_ETHER and back +# this allows us to test both MASTER and SLAVE flags at once +bond_test_enslave_type_change() +{ + local test_success=$1 + local devbond0="test-bond0" + local devbond1="test-bond1" + local devbond2="test-bond2" + local nonethdev="test-noneth0" + + # create a non-ARPHRD_ETHER device for testing (e.g. nlmon type) + ip link add name "$nonethdev" type nlmon + check_err $? "could not create a non-ARPHRD_ETHER device (nlmon)" + ip link add name "$devbond0" type bond + if [ $test_success -eq 1 ]; then + # we need devbond0 in active-backup mode to successfully enslave nonethdev + ip link set dev "$devbond0" type bond mode active-backup + check_err $? "could not change bond mode to active-backup" + fi + ip link add name "$devbond1" type bond + ip link add name "$devbond2" type bond + ip link set dev "$devbond0" master "$devbond1" + check_err $? "could not enslave $devbond0 to $devbond1" + # change bond type to non-ARPHRD_ETHER + ip link set dev "$nonethdev" master "$devbond0" 1>/dev/null 2>/dev/null + ip link set dev "$nonethdev" nomaster 1>/dev/null 2>/dev/null + # restore ARPHRD_ETHER type by enslaving such device + ip link set dev "$devbond2" master "$devbond0" + check_err $? "could not enslave $devbond2 to $devbond0" + ip link set dev "$devbond1" nomaster + + bond_check_flags "$devbond0" + + # clean up + ip link del dev "$devbond0" + ip link del dev "$devbond1" + ip link del dev "$devbond2" + ip link del dev "$nonethdev" +} + +bond_test_unsuccessful_enslave_type_change() +{ + RET=0 + + bond_test_enslave_type_change 0 + log_test "Change ether type of an enslaved bond device with unsuccessful enslave" +} + +bond_test_successful_enslave_type_change() +{ + RET=0 + + bond_test_enslave_type_change 1 + log_test "Change ether type of an enslaved bond device with successful enslave" +} + +tests_run + +exit "$EXIT_STATUS" -- cgit From 884a1f956179b561e7e7f1d29fc331cf53233c89 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 13 Oct 2022 12:42:29 +0200 Subject: tools/power turbostat: update dump of SECONDARY_TURBO_RATIO_LIMIT cosmetic only (but useful if you copy/paste) Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index aba460410dbd..7ae3086c5ec0 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2538,7 +2538,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model) get_msr(base_cpu, trl_msr_offset, &msr); fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", - base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY" : "", msr); + base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr); if (has_turbo_ratio_group_limits(family, model)) { get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); -- cgit From 9c08581728ccadc6b9e4135b7b8d31948e814f6f Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 18 Oct 2022 15:23:37 -0400 Subject: tools/power turbostat: Provide better debug messages for failed capabilities accesses turbostat reports some capabilities access errors and not others. Provide the same debug message for all errors. [lenb: remove extra quotes] Cc: David Arcari Signed-off-by: Prarit Bhargava Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 +- tools/power/x86/turbostat/turbostat.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index c7b26a3603af..7dcb4f4f3d55 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -340,7 +340,7 @@ starts a new interval. must be run as root. Alternatively, non-root users can be enabled to run turbostat this way: -# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep ./turbostat +# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep path/to/turbostat # chmod +r /dev/cpu/*/msr diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7ae3086c5ec0..d246ef6153dc 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -670,7 +670,8 @@ static int perf_instr_count_open(int cpu_num) /* counter for cpu_num, including user + kernel and all processes */ fd = perf_event_open(&pea, -1, cpu_num, -1, 0); if (fd == -1) { - warn("cpu%d: perf instruction counter", cpu_num); + warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", + progname); BIC_NOT_PRESENT(BIC_IPC); } @@ -3502,9 +3503,6 @@ release_msr: /* * set_my_sched_priority(pri) * return previous - * - * if non-root, do this: - * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat */ int set_my_sched_priority(int priority) { @@ -3518,7 +3516,8 @@ int set_my_sched_priority(int priority) retval = setpriority(PRIO_PROCESS, 0, priority); if (retval) - err(retval, "setpriority(%d)", priority); + errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", + progname); errno = 0; retval = getpriority(PRIO_PROCESS, 0); @@ -5476,7 +5475,8 @@ void print_dev_latency(void) fd = open(path, O_RDONLY); if (fd < 0) { - warn("fopen %s\n", path); + warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", + progname); return; } -- cgit From 40aafc7d58d3544f152a863a0e9863014b6d5d8c Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 15 Dec 2022 10:18:16 -0500 Subject: tools/power turbostat: Fix /dev/cpu_dma_latency warnings When running as non-root the following error is seen in turbostat: turbostat: fopen /dev/cpu_dma_latency : Permission denied turbostat and the man page have information on how to avoid other permission errors, so these can be fixed the same way. Provide better /dev/cpu_dma_latency warnings that provide instructions on how to avoid the error, and update the man page. Signed-off-by: Prarit Bhargava Cc: linux-pm@vger.kernel.org Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 ++ tools/power/x86/turbostat/turbostat.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 7dcb4f4f3d55..8f08c3fd498d 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -344,6 +344,8 @@ Alternatively, non-root users can be enabled to run turbostat this way: # chmod +r /dev/cpu/*/msr +# chmod +r /dev/cpu_dma_latency + .B "turbostat " reads hardware counters, but doesn't write them. So it will not interfere with the OS or other programs, including diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d246ef6153dc..9aed2620a2d2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5482,7 +5482,7 @@ void print_dev_latency(void) retval = read(fd, (void *)&value, sizeof(int)); if (retval != sizeof(int)) { - warn("read %s\n", path); + warn("read failed %s\n", path); close(fd); return; } -- cgit From 6cbfedc7afc93a7f06d4a2f7c4d4732b20adfc2b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Mar 2023 11:25:56 -0400 Subject: tools/power turbostat: remove stray newlines from warn/warnx strings warn(3) terminates strings with newlines Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9aed2620a2d2..649b48e53a31 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5482,7 +5482,7 @@ void print_dev_latency(void) retval = read(fd, (void *)&value, sizeof(int)); if (retval != sizeof(int)) { - warn("read failed %s\n", path); + warn("read failed %s", path); close(fd); return; } @@ -5543,7 +5543,7 @@ void process_cpuid() edx_flags = edx; if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) - warnx("get_msr(UCODE)\n"); + warnx("get_msr(UCODE)"); /* * check max extended function levels of CPUID. -- cgit From 93cac4150727dae0ee89f501dd75413b88eedec0 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Jan 2023 22:23:53 +0800 Subject: tools/power turbostat: Introduce support for EMR Introduce support for EMR. Signed-off-by: Zhang Rui Reviewed-by: Artem Bityutskiy Tested-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 649b48e53a31..eba8a20a4b00 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5462,6 +5462,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ICELAKE_D: return INTEL_FAM6_ICELAKE_X; + + case INTEL_FAM6_EMERALDRAPIDS_X: + return INTEL_FAM6_SAPPHIRERAPIDS_X; } return model; } -- cgit From 92c25393586ac799b9b7d9e50434f3c44a7622c4 Mon Sep 17 00:00:00 2001 From: Antti Laakso Date: Wed, 25 Jan 2023 15:17:50 +0200 Subject: tools/power turbostat: fix decoding of HWP_STATUS The "excursion to minimum" information is in bit2 in HWP_STATUS MSR. Fix the bitmask used for decoding the register. Signed-off-by: Antti Laakso Reviewed-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index eba8a20a4b00..7424c35fe209 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4425,7 +4425,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", - cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-"); + cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); return 0; } -- cgit From de7839ee02c651335db6bf0af24cf5cac3cc4c72 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 17 Mar 2023 11:34:10 -0400 Subject: tools/power turbostat: version 2023.03.17 Happy St. Patrick's Day! Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7424c35fe209..8a36ba5df9f9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3,7 +3,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel and AMD processors. * - * Copyright (c) 2022 Intel Corporation. + * Copyright (c) 2023 Intel Corporation. * Len Brown */ @@ -670,8 +670,7 @@ static int perf_instr_count_open(int cpu_num) /* counter for cpu_num, including user + kernel and all processes */ fd = perf_event_open(&pea, -1, cpu_num, -1, 0); if (fd == -1) { - warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", - progname); + warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname); BIC_NOT_PRESENT(BIC_IPC); } @@ -3516,8 +3515,7 @@ int set_my_sched_priority(int priority) retval = setpriority(PRIO_PROCESS, 0, priority); if (retval) - errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", - progname); + errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname); errno = 0; retval = getpriority(PRIO_PROCESS, 0); @@ -5478,8 +5476,7 @@ void print_dev_latency(void) fd = open(path, O_RDONLY); if (fd < 0) { - warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", - progname); + warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname); return; } @@ -6228,7 +6225,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2022.10.04 - Len Brown \n"); + fprintf(outf, "turbostat version 2023.03.17 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 -- cgit From 2f0e4f0342201fe2228fcc2301cc2b42ae04b8e3 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 16 Mar 2023 10:45:12 +0000 Subject: cifs: check only tcon status on tcon related functions We had a couple of checks for session in cifs_tree_connect and cifs_mark_open_files_invalid, which were unnecessary. And that was done with ses_lock. Changed that to tc_lock too. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/connect.c | 10 +++++++--- fs/cifs/dfs.c | 10 +++++++--- fs/cifs/dfs_cache.c | 2 +- fs/cifs/file.c | 8 ++++---- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0eceddde7140..49b37594e991 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4036,9 +4036,13 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); - if (tcon->ses->ses_status != SES_GOOD || - (tcon->status != TID_NEW && - tcon->status != TID_NEED_TCON)) { + if (tcon->status != TID_NEW && + tcon->status != TID_NEED_TCON) { + spin_unlock(&tcon->tc_lock); + return -EHOSTDOWN; + } + + if (tcon->status == TID_GOOD) { spin_unlock(&tcon->tc_lock); return 0; } diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c index c8bda52fa096..3a11716b6e13 100644 --- a/fs/cifs/dfs.c +++ b/fs/cifs/dfs.c @@ -502,9 +502,13 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); - if (tcon->ses->ses_status != SES_GOOD || - (tcon->status != TID_NEW && - tcon->status != TID_NEED_TCON)) { + if (tcon->status != TID_NEW && + tcon->status != TID_NEED_TCON) { + spin_unlock(&tcon->tc_lock); + return -EHOSTDOWN; + } + + if (tcon->status == TID_GOOD) { spin_unlock(&tcon->tc_lock); return 0; } diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 1c59811bfa73..30cbdf8514a5 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1191,7 +1191,7 @@ static int __refresh_tcon(const char *path, struct cifs_tcon *tcon, bool force_r } spin_lock(&ipc->tc_lock); - if (ses->ses_status != SES_GOOD || ipc->status != TID_GOOD) { + if (ipc->status != TID_GOOD) { spin_unlock(&ipc->tc_lock); cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n", __func__); goto out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4d4a2d82636d..6831a9949c43 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -174,13 +174,13 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) struct list_head *tmp1; /* only send once per connect */ - spin_lock(&tcon->ses->ses_lock); - if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) { - spin_unlock(&tcon->ses->ses_lock); + spin_lock(&tcon->tc_lock); + if (tcon->status != TID_NEED_RECON) { + spin_unlock(&tcon->tc_lock); return; } tcon->status = TID_IN_FILES_INVALIDATE; - spin_unlock(&tcon->ses->ses_lock); + spin_unlock(&tcon->tc_lock); /* list all files open on tree connection and mark them invalid */ spin_lock(&tcon->open_file_lock); -- cgit From 27c934dd8832dd40fd34776f916dc201e18b319b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 17 Mar 2023 13:13:08 -0400 Subject: nfsd: don't replace page in rq_pages if it's a continuation of last page The splice read calls nfsd_splice_actor to put the pages containing file data into the svc_rqst->rq_pages array. It's possible however to get a splice result that only has a partial page at the end, if (e.g.) the filesystem hands back a short read that doesn't cover the whole page. nfsd_splice_actor will plop the partial page into its rq_pages array and return. Then later, when nfsd_splice_actor is called again, the remainder of the page may end up being filled out. At this point, nfsd_splice_actor will put the page into the array _again_ corrupting the reply. If this is done enough times, rq_next_page will overrun the array and corrupt the trailing fields -- the rq_respages and rq_next_page pointers themselves. If we've already added the page to the array in the last pass, don't add it to the array a second time when dealing with a splice continuation. This was originally handled properly in nfsd_splice_actor, but commit 91e23b1c3982 ("NFSD: Clean up nfsd_splice_actor()") removed the check for it. Fixes: 91e23b1c3982 ("NFSD: Clean up nfsd_splice_actor()") Cc: Al Viro Reported-by: Dario Lesca Tested-by: David Critch Link: https://bugzilla.redhat.com/show_bug.cgi?id=2150630 Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ba34a31a7c70..cd0dbea335d9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -941,8 +941,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct page *last_page; last_page = page + (offset + sd->len - 1) / PAGE_SIZE; - for (page += offset / PAGE_SIZE; page <= last_page; page++) + for (page += offset / PAGE_SIZE; page <= last_page; page++) { + /* + * Skip page replacement when extending the contents + * of the current page. + */ + if (page == *(rqstp->rq_next_page - 1)) + continue; svc_rqst_replace_page(rqstp, page); + } if (rqstp->rq_res.page_len == 0) // first call rqstp->rq_res.page_base = offset % PAGE_SIZE; rqstp->rq_res.page_len += sd->len; -- cgit From 70e42feab2e20618ddd0cbfc4ab4b08628236ecd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 17 Mar 2023 21:53:52 -0400 Subject: ext4: fix possible double unlock when moving a directory Fixes: 0813299c586b ("ext4: Fix possible corruption when moving a directory") Link: https://lore.kernel.org/r/5efbe1b9-ad8b-4a4f-b422-24824d2b775c@kili.mountain Reported-by: Dan Carpenter Reported-by: syzbot+0c73d1d8b952c5f3d714@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 31e21de56432..a5010b5b8a8c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3884,10 +3884,8 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, goto end_rename; } retval = ext4_rename_dir_prepare(handle, &old); - if (retval) { - inode_unlock(old.inode); + if (retval) goto end_rename; - } } /* * If we're renaming a file within an inline_data dir and adding or -- cgit From 30796d0dcb6e41c6558a07950f2ce60c209da867 Mon Sep 17 00:00:00 2001 From: Álvaro Fernández Rojas Date: Thu, 16 Mar 2023 18:28:07 +0100 Subject: net: dsa: b53: mmap: fix device tree support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CPU port should also be enabled in order to get a working switch. Fixes: a5538a777b73 ("net: dsa: b53: mmap: Add device tree support") Signed-off-by: Álvaro Fernández Rojas Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20230316172807.460146-1-noltari@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index e968322dfbf0..70887e0aece3 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -263,7 +263,7 @@ static int b53_mmap_probe_of(struct platform_device *pdev, if (of_property_read_u32(of_port, "reg", ®)) continue; - if (reg < B53_CPU_PORT) + if (reg < B53_N_PORTS) pdata->enabled_ports |= BIT(reg); } -- cgit From ff821092cf02a70c2bccd2d19269f01e29aa52cf Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Thu, 16 Mar 2023 11:19:54 +0100 Subject: net: usb: smsc95xx: Limit packet length to skb->len Packet length retrieved from descriptor may be larger than the actual socket buffer length. In such case the cloned skb passed up the network stack will leak kernel memory contents. Fixes: 2f7ca802bdae ("net: Add SMSC LAN9500 USB2.0 10/100 ethernet adapter driver") Signed-off-by: Szymon Heidrich Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230316101954.75836-1-szymon.heidrich@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/smsc95xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 32d2c60d334d..563ecd27b93e 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1833,6 +1833,12 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) size = (u16)((header & RX_STS_FL_) >> 16); align_count = (4 - ((size + NET_IP_ALIGN) % 4)) % 4; + if (unlikely(size > skb->len)) { + netif_dbg(dev, rx_err, dev->net, + "size err header=0x%08x\n", header); + return 0; + } + if (unlikely(header & RX_STS_ES_)) { netif_dbg(dev, rx_err, dev->net, "Error header=0x%08x\n", header); -- cgit From 3dacc5bb81472905a7f4f9879cb95477c22dc359 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 16 Mar 2023 15:22:32 +0530 Subject: net: ethernet: ti: am65-cpts: reset pps genf adj settings on enable The CPTS PPS GENf adjustment settings are invalid after it has been disabled for a while, so reset them. Fixes: eb9233ce6751 ("net: ethernet: ti: am65-cpts: adjust pps following ptp changes") Signed-off-by: Grygorii Strashko Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Reviewed-by: Michal Swiatkowski Link: https://lore.kernel.org/r/20230316095232.2002680-1-s-vadapalli@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpts.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 16ee9c29cb35..8caf85acbb6a 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -636,6 +636,10 @@ static void am65_cpts_perout_enable_hw(struct am65_cpts *cpts, val = lower_32_bits(cycles); am65_cpts_write32(cpts, val, genf[req->index].length); + am65_cpts_write32(cpts, 0, genf[req->index].control); + am65_cpts_write32(cpts, 0, genf[req->index].ppm_hi); + am65_cpts_write32(cpts, 0, genf[req->index].ppm_low); + cpts->genf_enable |= BIT(req->index); } else { am65_cpts_write32(cpts, 0, genf[req->index].length); -- cgit From 34343eb06afc04af9178a9883d9354dc12beede0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 Feb 2023 19:23:09 +0100 Subject: efi/libstub: smbios: Use length member instead of record struct size The type 1 SMBIOS record happens to always be the same size, but there are other record types which have been augmented over time, and so we should really use the length field in the header to decide where the string table starts. Fixes: 550b33cfd4452968 ("arm64: efi: Force the use of ...") Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/smbios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/smbios.c b/drivers/firmware/efi/libstub/smbios.c index 460418b7f5f5..aadb422b9637 100644 --- a/drivers/firmware/efi/libstub/smbios.c +++ b/drivers/firmware/efi/libstub/smbios.c @@ -36,7 +36,7 @@ const u8 *__efi_get_smbios_string(u8 type, int offset, int recsize) if (status != EFI_SUCCESS) return NULL; - strtable = (u8 *)record + recsize; + strtable = (u8 *)record + record->length; for (int i = 1; i < ((u8 *)record)[offset]; i++) { int len = strlen(strtable); -- cgit From eb684408f3ea4856639675d6465f0024e498e4b1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 Feb 2023 17:00:49 +0100 Subject: arm64: efi: Use SMBIOS processor version to key off Ampere quirk Instead of using the SMBIOS type 1 record 'family' field, which is often modified by OEMs, use the type 4 'processor ID' and 'processor version' fields, which are set to a small set of probe-able values on all known Ampere EFI systems in the field. Fixes: 550b33cfd4452968 ("arm64: efi: Force the use of ...") Tested-by: Andrea Righi Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64.c | 39 +++++++++++++++++++++++++------- drivers/firmware/efi/libstub/efistub.h | 41 +++++++++++++++++++++++++++++++--- drivers/firmware/efi/libstub/smbios.c | 13 +++++++++-- 3 files changed, 80 insertions(+), 13 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64.c b/drivers/firmware/efi/libstub/arm64.c index 399770266372..8aad8c49d43f 100644 --- a/drivers/firmware/efi/libstub/arm64.c +++ b/drivers/firmware/efi/libstub/arm64.c @@ -16,20 +16,43 @@ static bool system_needs_vamap(void) { - const u8 *type1_family = efi_get_smbios_string(1, family); + const struct efi_smbios_type4_record *record; + const u32 __aligned(1) *socid; + const u8 *version; /* * Ampere eMAG, Altra, and Altra Max machines crash in SetTime() if - * SetVirtualAddressMap() has not been called prior. + * SetVirtualAddressMap() has not been called prior. Most Altra systems + * can be identified by the SMCCC soc ID, which is conveniently exposed + * via the type 4 SMBIOS records. Otherwise, test the processor version + * field. eMAG systems all appear to have the processor version field + * set to "eMAG". */ - if (!type1_family || ( - strcmp(type1_family, "eMAG") && - strcmp(type1_family, "Altra") && - strcmp(type1_family, "Altra Max"))) + record = (struct efi_smbios_type4_record *)efi_get_smbios_record(4); + if (!record) return false; - efi_warn("Working around broken SetVirtualAddressMap()\n"); - return true; + socid = (u32 *)record->processor_id; + switch (*socid & 0xffff000f) { + static char const altra[] = "Ampere(TM) Altra(TM) Processor"; + static char const emag[] = "eMAG"; + + default: + version = efi_get_smbios_string(&record->header, 4, + processor_version); + if (!version || (strncmp(version, altra, sizeof(altra) - 1) && + strncmp(version, emag, sizeof(emag) - 1))) + break; + + fallthrough; + + case 0x0a160001: // Altra + case 0x0a160002: // Altra Max + efi_warn("Working around broken SetVirtualAddressMap()\n"); + return true; + } + + return false; } efi_status_t check_platform_features(void) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 6bd3bb86d967..330565b9263a 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -1074,6 +1074,8 @@ struct efi_smbios_record { u16 handle; }; +const struct efi_smbios_record *efi_get_smbios_record(u8 type); + struct efi_smbios_type1_record { struct efi_smbios_record header; @@ -1087,14 +1089,47 @@ struct efi_smbios_type1_record { u8 family; }; -#define efi_get_smbios_string(__type, __name) ({ \ +struct efi_smbios_type4_record { + struct efi_smbios_record header; + + u8 socket; + u8 processor_type; + u8 processor_family; + u8 processor_manufacturer; + u8 processor_id[8]; + u8 processor_version; + u8 voltage; + u16 external_clock; + u16 max_speed; + u16 current_speed; + u8 status; + u8 processor_upgrade; + u16 l1_cache_handle; + u16 l2_cache_handle; + u16 l3_cache_handle; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 core_count; + u8 enabled_core_count; + u8 thread_count; + u16 processor_characteristics; + u16 processor_family2; + u16 core_count2; + u16 enabled_core_count2; + u16 thread_count2; + u16 thread_enabled; +}; + +#define efi_get_smbios_string(__record, __type, __name) ({ \ int size = sizeof(struct efi_smbios_type ## __type ## _record); \ int off = offsetof(struct efi_smbios_type ## __type ## _record, \ __name); \ - __efi_get_smbios_string(__type, off, size); \ + __efi_get_smbios_string((__record), __type, off, size); \ }) -const u8 *__efi_get_smbios_string(u8 type, int offset, int recsize); +const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record, + u8 type, int offset, int recsize); void efi_remap_image(unsigned long image_base, unsigned alloc_size, unsigned long code_size); diff --git a/drivers/firmware/efi/libstub/smbios.c b/drivers/firmware/efi/libstub/smbios.c index aadb422b9637..f9c159c28f46 100644 --- a/drivers/firmware/efi/libstub/smbios.c +++ b/drivers/firmware/efi/libstub/smbios.c @@ -22,19 +22,28 @@ struct efi_smbios_protocol { u8 minor_version; }; -const u8 *__efi_get_smbios_string(u8 type, int offset, int recsize) +const struct efi_smbios_record *efi_get_smbios_record(u8 type) { struct efi_smbios_record *record; efi_smbios_protocol_t *smbios; efi_status_t status; u16 handle = 0xfffe; - const u8 *strtable; status = efi_bs_call(locate_protocol, &EFI_SMBIOS_PROTOCOL_GUID, NULL, (void **)&smbios) ?: efi_call_proto(smbios, get_next, &handle, &type, &record, NULL); if (status != EFI_SUCCESS) return NULL; + return record; +} + +const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record, + u8 type, int offset, int recsize) +{ + const u8 *strtable; + + if (!record) + return NULL; strtable = (u8 *)record + record->length; for (int i = 1; i < ((u8 *)record)[offset]; i++) { -- cgit From f59a7ec1e69fc23946175b8c0d7e0fd21f94f8c9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 28 Feb 2023 19:33:14 +0100 Subject: efi/libstub: smbios: Drop unused 'recsize' parameter We no longer use the recsize argument for locating the string table in an SMBIOS record, so we can drop it from the internal API. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efistub.h | 5 ++--- drivers/firmware/efi/libstub/smbios.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 330565b9263a..bd9c38a93bbc 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -1122,14 +1122,13 @@ struct efi_smbios_type4_record { }; #define efi_get_smbios_string(__record, __type, __name) ({ \ - int size = sizeof(struct efi_smbios_type ## __type ## _record); \ int off = offsetof(struct efi_smbios_type ## __type ## _record, \ __name); \ - __efi_get_smbios_string((__record), __type, off, size); \ + __efi_get_smbios_string((__record), __type, off); \ }) const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record, - u8 type, int offset, int recsize); + u8 type, int offset); void efi_remap_image(unsigned long image_base, unsigned alloc_size, unsigned long code_size); diff --git a/drivers/firmware/efi/libstub/smbios.c b/drivers/firmware/efi/libstub/smbios.c index f9c159c28f46..c217de2cc8d5 100644 --- a/drivers/firmware/efi/libstub/smbios.c +++ b/drivers/firmware/efi/libstub/smbios.c @@ -38,7 +38,7 @@ const struct efi_smbios_record *efi_get_smbios_record(u8 type) } const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record, - u8 type, int offset, int recsize) + u8 type, int offset) { const u8 *strtable; -- cgit From 3615c78673c332b69aaacefbcde5937c5c706686 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Mar 2023 13:31:02 +0100 Subject: efi: sysfb_efi: Fix DMI quirks not working for simpledrm Commit 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") moved the sysfb_apply_efi_quirks() call in sysfb_init() from before the [sysfb_]parse_mode() call to after it. But sysfb_apply_efi_quirks() modifies the global screen_info struct which [sysfb_]parse_mode() parses, so doing it later is too late. This has broken all DMI based quirks for correcting wrong firmware efifb settings when simpledrm is used. To fix this move the sysfb_apply_efi_quirks() call back to its old place and split the new setup of the efifb_fwnode (which requires the platform_device) into its own function and call that at the place of the moved sysfb_apply_efi_quirks(pd) calls. Fixes: 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") Cc: stable@vger.kernel.org Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Signed-off-by: Hans de Goede Reviewed-by: Javier Martinez Canillas Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/sysfb_efi.c | 5 ++++- drivers/firmware/sysfb.c | 4 +++- drivers/firmware/sysfb_simplefb.c | 2 +- include/linux/sysfb.h | 9 +++++++-- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c index f06fdacc9bc8..e76d6803bdd0 100644 --- a/drivers/firmware/efi/sysfb_efi.c +++ b/drivers/firmware/efi/sysfb_efi.c @@ -341,7 +341,7 @@ static const struct fwnode_operations efifb_fwnode_ops = { #ifdef CONFIG_EFI static struct fwnode_handle efifb_fwnode; -__init void sysfb_apply_efi_quirks(struct platform_device *pd) +__init void sysfb_apply_efi_quirks(void) { if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS)) @@ -355,7 +355,10 @@ __init void sysfb_apply_efi_quirks(struct platform_device *pd) screen_info.lfb_height = temp; screen_info.lfb_linelength = 4 * screen_info.lfb_width; } +} +__init void sysfb_set_efifb_fwnode(struct platform_device *pd) +{ if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI && IS_ENABLED(CONFIG_PCI)) { fwnode_init(&efifb_fwnode, &efifb_fwnode_ops); pd->dev.fwnode = &efifb_fwnode; diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 3fd3563d962b..3c197db42c9d 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -81,6 +81,8 @@ static __init int sysfb_init(void) if (disabled) goto unlock_mutex; + sysfb_apply_efi_quirks(); + /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { @@ -107,7 +109,7 @@ static __init int sysfb_init(void) goto unlock_mutex; } - sysfb_apply_efi_quirks(pd); + sysfb_set_efifb_fwnode(pd); ret = platform_device_add_data(pd, si, sizeof(*si)); if (ret) diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index ce9c007ed66f..82c64cb9f531 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -141,7 +141,7 @@ __init struct platform_device *sysfb_create_simplefb(const struct screen_info *s if (!pd) return ERR_PTR(-ENOMEM); - sysfb_apply_efi_quirks(pd); + sysfb_set_efifb_fwnode(pd); ret = platform_device_add_resources(pd, &res, 1); if (ret) diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 8ba8b5be5567..c1ef5fc60a3c 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -70,11 +70,16 @@ static inline void sysfb_disable(void) #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; -void sysfb_apply_efi_quirks(struct platform_device *pd); +void sysfb_apply_efi_quirks(void); +void sysfb_set_efifb_fwnode(struct platform_device *pd); #else /* CONFIG_EFI */ -static inline void sysfb_apply_efi_quirks(struct platform_device *pd) +static inline void sysfb_apply_efi_quirks(void) +{ +} + +static inline void sysfb_set_efifb_fwnode(struct platform_device *pd) { } -- cgit From 5ed213dd64681f84a01ceaa82fb336cf7d59ddcf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Mar 2023 13:31:03 +0100 Subject: efi: sysfb_efi: Add quirk for Lenovo Yoga Book X91F/L Another Lenovo convertable which reports a landscape resolution of 1920x1200 with a pitch of (1920 * 4) bytes, while the actual framebuffer has a resolution of 1200x1920 with a pitch of (1200 * 4) bytes. Signed-off-by: Hans de Goede Reviewed-by: Javier Martinez Canillas Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/sysfb_efi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c index e76d6803bdd0..456d0e5eaf78 100644 --- a/drivers/firmware/efi/sysfb_efi.c +++ b/drivers/firmware/efi/sysfb_efi.c @@ -272,6 +272,14 @@ static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = { "IdeaPad Duet 3 10IGL5"), }, }, + { + /* Lenovo Yoga Book X91F / X91L */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + /* Non exact match to match F + L versions */ + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"), + }, + }, {}, }; -- cgit From 4985e7b2c002eb4c5c794a1d3acd91b82c89a0fd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 18 Mar 2023 22:12:31 +0800 Subject: block: ublk_drv: mark device as LIVE before adding disk IO can be started before add_disk() returns, such as reading parititon table, then the monitor work should work for making forward progress. So mark device as LIVE before adding disk, meantime change to DEAD if add_disk() fails. Fixed: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Reviewed-by: Ziyang Zhang Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230318141231.55562-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d1d1c8d606c8..fb5a557afde8 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1602,17 +1602,18 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); get_device(&ub->cdev_dev); + ub->dev_info.state = UBLK_S_DEV_LIVE; ret = add_disk(disk); if (ret) { /* * Has to drop the reference since ->free_disk won't be * called in case of add_disk failure. */ + ub->dev_info.state = UBLK_S_DEV_DEAD; ublk_put_device(ub); goto out_put_disk; } set_bit(UB_STATE_USED, &ub->state); - ub->dev_info.state = UBLK_S_DEV_LIVE; out_put_disk: if (ret) put_disk(disk); -- cgit From efbcbb12ee99f750c9f25c873b55ad774871de2a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 17 Mar 2023 13:51:17 -0700 Subject: media: m5mols: fix off-by-one loop termination error The __find_restype() function loops over the m5mols_default_ffmt[] array, and the termination condition ends up being wrong: instead of stopping when the iterator becomes the size of the array it traverses, it stops after it has already overshot the array. Now, in practice this doesn't likely matter, because the code will always find the entry it looks for, and will thus return early and never hit that last extra iteration. But it turns out that clang will unroll the loop fully, because it has only two iterations (well, three due to the off-by-one bug), and then clang will end up just giving up in the middle of the loop unrolling when it notices that the code walks past the end of the array. And that made 'objtool' very unhappy indeed, because the generated code just falls off the edge of the universe, and ends up falling through to the next function, causing this warning: drivers/media/i2c/m5mols/m5mols.o: warning: objtool: m5mols_set_fmt() falls through to next function m5mols_get_frame_desc() Fix the loop ending condition. Reported-by: Jens Axboe Analyzed-by: Miguel Ojeda Analyzed-by: Nick Desaulniers Link: https://lore.kernel.org/linux-block/CAHk-=wgTSdKYbmB1JYM5vmHMcD9J9UZr0mn7BOYM_LudrP+Xvw@mail.gmail.com/ Fixes: bc125106f8af ("[media] Add support for M-5MOLS 8 Mega Pixel camera ISP") Cc: HeungJun, Kim Cc: Sylwester Nawrocki Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- drivers/media/i2c/m5mols/m5mols_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/m5mols/m5mols_core.c b/drivers/media/i2c/m5mols/m5mols_core.c index 2b01873ba0db..5c2336f318d9 100644 --- a/drivers/media/i2c/m5mols/m5mols_core.c +++ b/drivers/media/i2c/m5mols/m5mols_core.c @@ -488,7 +488,7 @@ static enum m5mols_restype __find_restype(u32 code) do { if (code == m5mols_default_ffmt[type].code) return type; - } while (type++ != SIZE_DEFAULT_FFMT); + } while (++type != SIZE_DEFAULT_FFMT); return 0; } -- cgit From 43e5f1d5921128373743585e3275ed9044ef8b8f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 13 Mar 2023 15:12:30 -0700 Subject: fscrypt: improve fscrypt_destroy_keyring() documentation Document that fscrypt_destroy_keyring() must be called after all potentially-encrypted inodes have been evicted. Link: https://lore.kernel.org/r/20230313221231.272498-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keyring.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 78086f8dbda5..bb15709ac9a4 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -207,10 +207,11 @@ static int allocate_filesystem_keyring(struct super_block *sb) * Release all encryption keys that have been added to the filesystem, along * with the keyring that contains them. * - * This is called at unmount time. The filesystem's underlying block device(s) - * are still available at this time; this is important because after user file - * accesses have been allowed, this function may need to evict keys from the - * keyslots of an inline crypto engine, which requires the block device(s). + * This is called at unmount time, after all potentially-encrypted inodes have + * been evicted. The filesystem's underlying block device(s) are still + * available at this time; this is important because after user file accesses + * have been allowed, this function may need to evict keys from the keyslots of + * an inline crypto engine, which requires the block device(s). */ void fscrypt_destroy_keyring(struct super_block *sb) { @@ -227,12 +228,12 @@ void fscrypt_destroy_keyring(struct super_block *sb) hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) { /* - * Since all inodes were already evicted, every key - * remaining in the keyring should have an empty inode - * list, and should only still be in the keyring due to - * the single active ref associated with ->mk_secret. - * There should be no structural refs beyond the one - * associated with the active ref. + * Since all potentially-encrypted inodes were already + * evicted, every key remaining in the keyring should + * have an empty inode list, and should only still be in + * the keyring due to the single active ref associated + * with ->mk_secret. There should be no structural refs + * beyond the one associated with the active ref. */ WARN_ON(refcount_read(&mk->mk_active_refs) != 1); WARN_ON(refcount_read(&mk->mk_struct_refs) != 1); -- cgit From 4bcf6f827a79c59806c695dc280e763c5b6a6813 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 13 Mar 2023 15:12:31 -0700 Subject: fscrypt: check for NULL keyring in fscrypt_put_master_key_activeref() It is a bug for fscrypt_put_master_key_activeref() to see a NULL keyring. But it used to be possible due to the bug, now fixed, where fscrypt_destroy_keyring() was called before security_sb_delete(). To be consistent with how fscrypt_destroy_keyring() uses WARN_ON for the same issue, WARN and leak the fscrypt_master_key if the keyring is NULL instead of dereferencing the NULL pointer. This is a robustness improvement, not a fix. Link: https://lore.kernel.org/r/20230313221231.272498-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/keyring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index bb15709ac9a4..13d336a6cc5d 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -92,6 +92,8 @@ void fscrypt_put_master_key_activeref(struct super_block *sb, * destroying any subkeys embedded in it. */ + if (WARN_ON(!sb->s_master_keys)) + return; spin_lock(&sb->s_master_keys->lock); hlist_del_rcu(&mk->mk_node); spin_unlock(&sb->s_master_keys->lock); -- cgit From 25143b6a01d0cc5319edd3de22ffa2578b045550 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Thu, 16 Mar 2023 13:29:21 +0300 Subject: qed/qed_sriov: guard against NULL derefs from qed_iov_get_vf_info We have to make sure that the info returned by the helper is valid before using it. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: f990c82c385b ("qed*: Add support for ndo_set_vf_trust") Fixes: 733def6a04bf ("qed*: IOV link control") Signed-off-by: Daniil Tatianin Reviewed-by: Michal Swiatkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 2bf18748581d..fa167b1aa019 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -4404,6 +4404,9 @@ qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate) } vf = qed_iov_get_vf_info(QED_LEADING_HWFN(cdev), (u16)vfid, true); + if (!vf) + return -EINVAL; + vport_id = vf->vport_id; return qed_configure_vport_wfq(cdev, vport_id, rate); @@ -5152,7 +5155,7 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) /* Validate that the VF has a configured vport */ vf = qed_iov_get_vf_info(hwfn, i, true); - if (!vf->vport_instance) + if (!vf || !vf->vport_instance) continue; memset(¶ms, 0, sizeof(params)); -- cgit From e8d20c3ded59a092532513c9bd030d1ea66f5f44 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Fri, 17 Mar 2023 00:15:26 +0800 Subject: xirc2ps_cs: Fix use after free bug in xirc2ps_detach In xirc2ps_probe, the local->tx_timeout_task was bounded with xirc2ps_tx_timeout_task. When timeout occurs, it will call xirc_tx_timeout->schedule_work to start the work. When we call xirc2ps_detach to remove the driver, there may be a sequence as follows: Stop responding to timeout tasks and complete scheduled tasks before cleanup in xirc2ps_detach, which will fix the problem. CPU0 CPU1 |xirc2ps_tx_timeout_task xirc2ps_detach | free_netdev | kfree(dev); | | | do_reset | //use dev Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zheng Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/xircom/xirc2ps_cs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index 894e92ef415b..9f505cf02d96 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -503,6 +503,11 @@ static void xirc2ps_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; + struct local_info *local = netdev_priv(dev); + + netif_carrier_off(dev); + netif_tx_disable(dev); + cancel_work_sync(&local->tx_timeout_task); dev_dbg(&link->dev, "detach\n"); -- cgit From 4203d84032e28f893594a453bd8bc9c3b15c7334 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Mar 2023 13:33:24 -0700 Subject: net: phy: Ensure state transitions are processed from phy_stop() In the phy_disconnect() -> phy_stop() path, we will be forcibly setting the PHY state machine to PHY_HALTED. This invalidates the old_state != phydev->state condition in phy_state_machine() such that we will neither display the state change for debugging, nor will we invoke the link_change_notify() callback. Factor the code by introducing phy_process_state_change(), and ensure that we process the state change from phy_stop() as well. Fixes: 5c5f626bcace ("net: phy: improve handling link_change_notify callback") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index b33e55a7364e..99a07eb54c44 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -57,6 +57,18 @@ static const char *phy_state_to_str(enum phy_state st) return NULL; } +static void phy_process_state_change(struct phy_device *phydev, + enum phy_state old_state) +{ + if (old_state != phydev->state) { + phydev_dbg(phydev, "PHY state change %s -> %s\n", + phy_state_to_str(old_state), + phy_state_to_str(phydev->state)); + if (phydev->drv && phydev->drv->link_change_notify) + phydev->drv->link_change_notify(phydev); + } +} + static void phy_link_up(struct phy_device *phydev) { phydev->phy_link_change(phydev, true); @@ -1301,6 +1313,7 @@ EXPORT_SYMBOL(phy_free_interrupt); void phy_stop(struct phy_device *phydev) { struct net_device *dev = phydev->attached_dev; + enum phy_state old_state; if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) { WARN(1, "called from state %s\n", @@ -1309,6 +1322,7 @@ void phy_stop(struct phy_device *phydev) } mutex_lock(&phydev->lock); + old_state = phydev->state; if (phydev->state == PHY_CABLETEST) { phy_abort_cable_test(phydev); @@ -1319,6 +1333,7 @@ void phy_stop(struct phy_device *phydev) sfp_upstream_stop(phydev->sfp_bus); phydev->state = PHY_HALTED; + phy_process_state_change(phydev, old_state); mutex_unlock(&phydev->lock); @@ -1436,13 +1451,7 @@ void phy_state_machine(struct work_struct *work) if (err < 0) phy_error(phydev); - if (old_state != phydev->state) { - phydev_dbg(phydev, "PHY state change %s -> %s\n", - phy_state_to_str(old_state), - phy_state_to_str(phydev->state)); - if (phydev->drv && phydev->drv->link_change_notify) - phydev->drv->link_change_notify(phydev); - } + phy_process_state_change(phydev, old_state); /* Only re-schedule a PHY state machine change if we are polling the * PHY, if PHY_MAC_INTERRUPT is set, then we will be moving -- cgit From 99669259f3361d759219811e670b7e0742668556 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Thu, 16 Mar 2023 16:33:16 -0700 Subject: net: mdio: fix owner field for mdio buses registered using device-tree Bus ownership is wrong when using of_mdiobus_register() to register an mdio bus. That function is not inline, so when it calls mdiobus_register() the wrong THIS_MODULE value is captured. Signed-off-by: Maxime Bizon Fixes: 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs") [florian: fix kdoc, added Fixes tag] Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/mdio/of_mdio.c | 12 +++++++----- drivers/net/phy/mdio_devres.c | 11 ++++++----- include/linux/of_mdio.h | 22 +++++++++++++++++++--- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 510822d6d0d9..1e46e39f5f46 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -139,21 +139,23 @@ bool of_mdiobus_child_is_phy(struct device_node *child) EXPORT_SYMBOL(of_mdiobus_child_is_phy); /** - * of_mdiobus_register - Register mii_bus and create PHYs from the device tree + * __of_mdiobus_register - Register mii_bus and create PHYs from the device tree * @mdio: pointer to mii_bus structure * @np: pointer to device_node of MDIO bus. + * @owner: module owning the @mdio object. * * This function registers the mii_bus structure and registers a phy_device * for each child node of @np. */ -int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) +int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np, + struct module *owner) { struct device_node *child; bool scanphys = false; int addr, rc; if (!np) - return mdiobus_register(mdio); + return __mdiobus_register(mdio, owner); /* Do not continue if the node is disabled */ if (!of_device_is_available(np)) @@ -172,7 +174,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) of_property_read_u32(np, "reset-post-delay-us", &mdio->reset_post_delay_us); /* Register the MDIO bus */ - rc = mdiobus_register(mdio); + rc = __mdiobus_register(mdio, owner); if (rc) return rc; @@ -236,7 +238,7 @@ unregister: mdiobus_unregister(mdio); return rc; } -EXPORT_SYMBOL(of_mdiobus_register); +EXPORT_SYMBOL(__of_mdiobus_register); /** * of_mdio_find_device - Given a device tree node, find the mdio_device diff --git a/drivers/net/phy/mdio_devres.c b/drivers/net/phy/mdio_devres.c index b560e99695df..69b829e6ab35 100644 --- a/drivers/net/phy/mdio_devres.c +++ b/drivers/net/phy/mdio_devres.c @@ -98,13 +98,14 @@ EXPORT_SYMBOL(__devm_mdiobus_register); #if IS_ENABLED(CONFIG_OF_MDIO) /** - * devm_of_mdiobus_register - Resource managed variant of of_mdiobus_register() + * __devm_of_mdiobus_register - Resource managed variant of of_mdiobus_register() * @dev: Device to register mii_bus for * @mdio: MII bus structure to register * @np: Device node to parse + * @owner: Owning module */ -int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, - struct device_node *np) +int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, + struct device_node *np, struct module *owner) { struct mdiobus_devres *dr; int ret; @@ -117,7 +118,7 @@ int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, if (!dr) return -ENOMEM; - ret = of_mdiobus_register(mdio, np); + ret = __of_mdiobus_register(mdio, np, owner); if (ret) { devres_free(dr); return ret; @@ -127,7 +128,7 @@ int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, devres_add(dev, dr); return 0; } -EXPORT_SYMBOL(devm_of_mdiobus_register); +EXPORT_SYMBOL(__devm_of_mdiobus_register); #endif /* CONFIG_OF_MDIO */ MODULE_LICENSE("GPL"); diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index da633d34ab86..8a52ef2e6fa6 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -14,9 +14,25 @@ #if IS_ENABLED(CONFIG_OF_MDIO) bool of_mdiobus_child_is_phy(struct device_node *child); -int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); -int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, - struct device_node *np); +int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np, + struct module *owner); + +static inline int of_mdiobus_register(struct mii_bus *mdio, + struct device_node *np) +{ + return __of_mdiobus_register(mdio, np, THIS_MODULE); +} + +int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, + struct device_node *np, struct module *owner); + +static inline int devm_of_mdiobus_register(struct device *dev, + struct mii_bus *mdio, + struct device_node *np) +{ + return __devm_of_mdiobus_register(dev, mdio, np, THIS_MODULE); +} + struct mdio_device *of_mdio_find_device(struct device_node *np); struct phy_device *of_phy_find_device(struct device_node *phy_np); struct phy_device * -- cgit From 30b605b8501e321f79e19c3238aa6ca31da6087c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Mar 2023 16:33:17 -0700 Subject: net: mdio: fix owner field for mdio buses registered using ACPI Bus ownership is wrong when using acpi_mdiobus_register() to register an mdio bus. That function is not inline, so when it calls mdiobus_register() the wrong THIS_MODULE value is captured. CC: Maxime Bizon Fixes: 803ca24d2f92 ("net: mdio: Add ACPI support code for mdio") Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/mdio/acpi_mdio.c | 10 ++++++---- include/linux/acpi_mdio.h | 9 ++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/mdio/acpi_mdio.c b/drivers/net/mdio/acpi_mdio.c index d77c987fda9c..4630dde01974 100644 --- a/drivers/net/mdio/acpi_mdio.c +++ b/drivers/net/mdio/acpi_mdio.c @@ -18,16 +18,18 @@ MODULE_AUTHOR("Calvin Johnson "); MODULE_LICENSE("GPL"); /** - * acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL. + * __acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL. * @mdio: pointer to mii_bus structure * @fwnode: pointer to fwnode of MDIO bus. This fwnode is expected to represent + * @owner: module owning this @mdio object. * an ACPI device object corresponding to the MDIO bus and its children are * expected to correspond to the PHY devices on that bus. * * This function registers the mii_bus structure and registers a phy_device * for each child node of @fwnode. */ -int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) +int __acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode, + struct module *owner) { struct fwnode_handle *child; u32 addr; @@ -35,7 +37,7 @@ int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) /* Mask out all PHYs from auto probing. */ mdio->phy_mask = GENMASK(31, 0); - ret = mdiobus_register(mdio); + ret = __mdiobus_register(mdio, owner); if (ret) return ret; @@ -55,4 +57,4 @@ int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) } return 0; } -EXPORT_SYMBOL(acpi_mdiobus_register); +EXPORT_SYMBOL(__acpi_mdiobus_register); diff --git a/include/linux/acpi_mdio.h b/include/linux/acpi_mdio.h index 0a24ab7cb66f..8e2eefa9fbc0 100644 --- a/include/linux/acpi_mdio.h +++ b/include/linux/acpi_mdio.h @@ -9,7 +9,14 @@ #include #if IS_ENABLED(CONFIG_ACPI_MDIO) -int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode); +int __acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode, + struct module *owner); + +static inline int +acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *handle) +{ + return __acpi_mdiobus_register(mdio, handle, THIS_MODULE); +} #else /* CONFIG_ACPI_MDIO */ static inline int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) -- cgit From 070246e4674b125860d311c18ce2623e73e2bd51 Mon Sep 17 00:00:00 2001 From: Jochen Henneberg Date: Fri, 17 Mar 2023 09:08:17 +0100 Subject: net: stmmac: Fix for mismatched host/device DMA address width Currently DMA address width is either read from a RO device register or force set from the platform data. This breaks DMA when the host DMA address width is <=32it but the device is >32bit. Right now the driver may decide to use a 2nd DMA descriptor for another buffer (happens in case of TSO xmit) assuming that 32bit addressing is used due to platform configuration but the device will still use both descriptor addresses as one address. This can be observed with the Intel EHL platform driver that sets 32bit for addr64 but the MAC reports 40bit. The TX queue gets stuck in case of TCP with iptables NAT configuration on TSO packets. The logic should be like this: Whatever we do on the host side (memory allocation GFP flags) should happen with the host DMA width, whenever we decide how to set addresses on the device registers we must use the device DMA address width. This patch renames the platform address width field from addr64 (term used in device datasheet) to host_addr and uses this value exclusively for host side operations while all chip operations consider the device DMA width as read from the device register. Fixes: 7cfc4486e7ea ("stmmac: intel: Configure EHL PSE0 GbE and PSE1 GbE to 32 bits DMA addressing") Signed-off-by: Jochen Henneberg Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 + drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 +-- .../net/ethernet/stmicro/stmmac/dwmac-mediatek.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 30 ++++++++++++---------- include/linux/stmmac.h | 2 +- 6 files changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 6b5d96bced47..ec9c130276d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -418,6 +418,7 @@ struct dma_features { unsigned int frpbs; unsigned int frpes; unsigned int addr64; + unsigned int host_dma_width; unsigned int rssen; unsigned int vlhash; unsigned int sphen; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index ac550d1ac015..2a2be65d65a0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -288,7 +288,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) goto err_parse_dt; } - plat_dat->addr64 = dwmac->ops->addr_width; + plat_dat->host_dma_width = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; plat_dat->clks_config = imx_dwmac_clks_config; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 7deb1f817dac..13aa919633b4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -684,7 +684,7 @@ static int ehl_pse0_common_data(struct pci_dev *pdev, intel_priv->is_pse = true; plat->bus_id = 2; - plat->addr64 = 32; + plat->host_dma_width = 32; plat->clk_ptp_rate = 200000000; @@ -725,7 +725,7 @@ static int ehl_pse1_common_data(struct pci_dev *pdev, intel_priv->is_pse = true; plat->bus_id = 3; - plat->addr64 = 32; + plat->host_dma_width = 32; plat->clk_ptp_rate = 200000000; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 2f7d8e4561d9..9ae31e3dc821 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -591,7 +591,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->use_phy_wol = priv_plat->mac_wol ? 0 : 1; plat->riwt_off = 1; plat->maxmtu = ETH_DATA_LEN; - plat->addr64 = priv_plat->variant->dma_bit_mask; + plat->host_dma_width = priv_plat->variant->dma_bit_mask; plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8f543c3ab5c5..17310ade88dd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1431,7 +1431,7 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); - if (priv->dma_cap.addr64 <= 32) + if (priv->dma_cap.host_dma_width <= 32) gfp |= GFP_DMA32; if (!buf->page) { @@ -4587,7 +4587,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) unsigned int entry = rx_q->dirty_rx; gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); - if (priv->dma_cap.addr64 <= 32) + if (priv->dma_cap.host_dma_width <= 32) gfp |= GFP_DMA32; while (dirty-- > 0) { @@ -6205,7 +6205,7 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) seq_printf(seq, "\tFlexible RX Parser: %s\n", priv->dma_cap.frpsel ? "Y" : "N"); seq_printf(seq, "\tEnhanced Addressing: %d\n", - priv->dma_cap.addr64); + priv->dma_cap.host_dma_width); seq_printf(seq, "\tReceive Side Scaling: %s\n", priv->dma_cap.rssen ? "Y" : "N"); seq_printf(seq, "\tVLAN Hash Filtering: %s\n", @@ -7178,20 +7178,22 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "SPH feature enabled\n"); } - /* The current IP register MAC_HW_Feature1[ADDR64] only define - * 32/40/64 bit width, but some SOC support others like i.MX8MP - * support 34 bits but it map to 40 bits width in MAC_HW_Feature1[ADDR64]. - * So overwrite dma_cap.addr64 according to HW real design. + /* Ideally our host DMA address width is the same as for the + * device. However, it may differ and then we have to use our + * host DMA width for allocation and the device DMA width for + * register handling. */ - if (priv->plat->addr64) - priv->dma_cap.addr64 = priv->plat->addr64; + if (priv->plat->host_dma_width) + priv->dma_cap.host_dma_width = priv->plat->host_dma_width; + else + priv->dma_cap.host_dma_width = priv->dma_cap.addr64; - if (priv->dma_cap.addr64) { + if (priv->dma_cap.host_dma_width) { ret = dma_set_mask_and_coherent(device, - DMA_BIT_MASK(priv->dma_cap.addr64)); + DMA_BIT_MASK(priv->dma_cap.host_dma_width)); if (!ret) { - dev_info(priv->device, "Using %d bits DMA width\n", - priv->dma_cap.addr64); + dev_info(priv->device, "Using %d/%d bits DMA host/device width\n", + priv->dma_cap.host_dma_width, priv->dma_cap.addr64); /* * If more than 32 bits can be addressed, make sure to @@ -7206,7 +7208,7 @@ int stmmac_dvr_probe(struct device *device, goto error_hw_init; } - priv->dma_cap.addr64 = 32; + priv->dma_cap.host_dma_width = 32; } } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a152678b82b7..a2414c187483 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -215,7 +215,7 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; - u32 addr64; + u32 host_dma_width; u32 rx_queues_to_use; u32 tx_queues_to_use; u8 rx_sched_algorithm; -- cgit From 04361b8bb81819efb68bf39c276025e2250ac537 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 17 Mar 2023 07:28:00 +0000 Subject: net: sfp: fix state loss when updating state_hw_mask Andrew reports that the SFF modules on one of the ZII platforms do not indicate link up due to the SFP code believing that LOS indicating that there is no signal being received from the remote end, but in fact the LOS signal is showing that there is signal. What makes SFF modules different from SFPs is they typically have an inverted LOS, which uncovered this issue. When we read the hardware state, we mask it with state_hw_mask so we ignore anything we're not interested in. However, we don't re-read when state_hw_mask changes, leading to sfp->state being stale. Arrange for a software poll of the module state after we have parsed the EEPROM in sfp_sm_mod_probe() and updated state_*_mask. This will generate any necessary events for signal changes for the state machine as well as updating sfp->state. Reported-by: Andrew Lunn Tested-by: Andrew Lunn Fixes: 8475c4b70b04 ("net: sfp: re-implement soft state polling setup") Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index c02cad6478a8..fb98db61e06c 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2190,6 +2190,11 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) break; } + /* Force a poll to re-read the hardware signal state after + * sfp_sm_mod_probe() changed state_hw_mask. + */ + mod_delayed_work(system_wq, &sfp->poll, 1); + err = sfp_hwmon_insert(sfp); if (err) dev_warn(sfp->dev, "hwmon probe failed: %pe\n", -- cgit From 6d206b1ea9f48433a96edec7028586db1d947911 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 17 Mar 2023 16:32:59 +0100 Subject: mlxsw: core_thermal: Fix fan speed in maximum cooling state The cooling levels array is supposed to prevent the system fans from being configured below a 20% duty cycle as otherwise some of them get stuck at 0 RPM. Due to an off-by-one error, the last element in the array was not initialized, causing it to be set to zero, which in turn lead to fans being configured with a 0% duty cycle in maximum cooling state. Since commit 332fdf951df8 ("mlxsw: thermal: Fix out-of-bounds memory accesses") the contents of the array are static. Therefore, instead of fixing the initialization of the array, simply remove it and adjust thermal_cooling_device_ops::set_cur_state() so that the configured duty cycle is never set below 20%. Before: # cat /sys/class/thermal/thermal_zone0/cdev0/type mlxsw_fan # echo 10 > /sys/class/thermal/thermal_zone0/cdev0/cur_state # cat /sys/class/hwmon/hwmon0/name mlxsw # cat /sys/class/hwmon/hwmon0/pwm1 0 After: # cat /sys/class/thermal/thermal_zone0/cdev0/type mlxsw_fan # echo 10 > /sys/class/thermal/thermal_zone0/cdev0/cur_state # cat /sys/class/hwmon/hwmon0/name mlxsw # cat /sys/class/hwmon/hwmon0/pwm1 255 This bug was uncovered when the thermal subsystem repeatedly tried to configure the cooling devices to their maximum state due to another issue [1]. This resulted in the fans being stuck at 0 RPM, which eventually lead to the system undergoing thermal shutdown. [1] https://lore.kernel.org/netdev/ZA3CFNhU4AbtsP4G@shredder/ Fixes: a421ce088ac8 ("mlxsw: core: Extend cooling device with cooling levels") Signed-off-by: Ido Schimmel Reviewed-by: Vadim Pasternak Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index c5240d38c9db..09ed6e5fa6c3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -105,7 +105,6 @@ struct mlxsw_thermal { struct thermal_zone_device *tzdev; int polling_delay; struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX]; - u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1]; struct thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; struct mlxsw_cooling_states cooling_states[MLXSW_THERMAL_NUM_TRIPS]; struct mlxsw_thermal_area line_cards[]; @@ -468,7 +467,7 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev, return idx; /* Normalize the state to the valid speed range. */ - state = thermal->cooling_levels[state]; + state = max_t(unsigned long, MLXSW_THERMAL_MIN_STATE, state); mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state)); err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl); if (err) { @@ -859,10 +858,6 @@ int mlxsw_thermal_init(struct mlxsw_core *core, } } - /* Initialize cooling levels per PWM state. */ - for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++) - thermal->cooling_levels[i] = max(MLXSW_THERMAL_MIN_STATE, i); - thermal->polling_delay = bus_info->low_frequency ? MLXSW_THERMAL_SLOW_POLL_INT : MLXSW_THERMAL_POLL_INT; -- cgit From bc4f359b3b607daac0290d0038561237a86b38cb Mon Sep 17 00:00:00 2001 From: Anton Gusev Date: Tue, 31 Jan 2023 10:58:18 +0300 Subject: tracing: Fix wrong return in kprobe_event_gen_test.c Overwriting the error code with the deletion result may cause the function to return 0 despite encountering an error. Commit b111545d26c0 ("tracing: Remove the useless value assignment in test_create_synth_event()") solves a similar issue by returning the original error code, so this patch does the same. Found by Linux Verification Center (linuxtesting.org) with SVACE. Link: https://lore.kernel.org/linux-trace-kernel/20230131075818.5322-1-aagusev@ispras.ru Signed-off-by: Anton Gusev Reviewed-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/kprobe_event_gen_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index 4850fdfe27f1..5a4b722b5045 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -146,7 +146,7 @@ static int __init test_gen_kprobe_cmd(void) if (trace_event_file_is_valid(gen_kprobe_test)) gen_kprobe_test = NULL; /* We got an error after creating the event, delete it */ - ret = kprobe_event_delete("gen_kprobe_test"); + kprobe_event_delete("gen_kprobe_test"); goto out; } @@ -211,7 +211,7 @@ static int __init test_gen_kretprobe_cmd(void) if (trace_event_file_is_valid(gen_kretprobe_test)) gen_kretprobe_test = NULL; /* We got an error after creating the event, delete it */ - ret = kprobe_event_delete("gen_kretprobe_test"); + kprobe_event_delete("gen_kretprobe_test"); goto out; } -- cgit From 7a025e066e0f0afd39cc88a089929ccb945ce9e8 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 9 Mar 2023 10:04:14 -0500 Subject: tracing/osnoise: set several trace_osnoise.c variables storage-class-specifier to static smatch reports several similar warnings kernel/trace/trace_osnoise.c:220:1: warning: symbol '__pcpu_scope_per_cpu_osnoise_var' was not declared. Should it be static? kernel/trace/trace_osnoise.c:243:1: warning: symbol '__pcpu_scope_per_cpu_timerlat_var' was not declared. Should it be static? kernel/trace/trace_osnoise.c:335:14: warning: symbol 'interface_lock' was not declared. Should it be static? kernel/trace/trace_osnoise.c:2242:5: warning: symbol 'timerlat_min_period' was not declared. Should it be static? kernel/trace/trace_osnoise.c:2243:5: warning: symbol 'timerlat_max_period' was not declared. Should it be static? These variables are only used in trace_osnoise.c, so it should be static Link: https://lore.kernel.org/linux-trace-kernel/20230309150414.4036764-1-trix@redhat.com Signed-off-by: Tom Rix Acked-by: Masami Hiramatsu (Google) Acked-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 04f0fdae19a1..9176bb7a9bb4 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -217,7 +217,7 @@ struct osnoise_variables { /* * Per-cpu runtime information. */ -DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); +static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); /* * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU @@ -240,7 +240,7 @@ struct timerlat_variables { u64 count; }; -DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); +static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); /* * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU @@ -332,7 +332,7 @@ struct timerlat_sample { /* * Protect the interface. */ -struct mutex interface_lock; +static struct mutex interface_lock; /* * Tracer data. @@ -2239,8 +2239,8 @@ static struct trace_min_max_param osnoise_print_stack = { /* * osnoise/timerlat_period: min 100 us, max 1 s */ -u64 timerlat_min_period = 100; -u64 timerlat_max_period = 1000000; +static u64 timerlat_min_period = 100; +static u64 timerlat_max_period = 1000000; static struct trace_min_max_param timerlat_period = { .lock = &interface_lock, .val = &osnoise_data.timerlat_period, -- cgit From 4c42f5f0d1dd20bddd9f940beb1e6ccad60c4498 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 10 Mar 2023 12:04:50 +0200 Subject: trace/hwlat: Do not wipe the contents of per-cpu thread data Do not wipe the contents of the per-cpu kthread data when starting the tracer, as this will completely forget about already running instances and can later start new additional per-cpu threads. Link: https://lore.kernel.org/all/20230302113654.2984709-1-tero.kristo@linux.intel.com/ Link: https://lkml.kernel.org/r/20230310100451.3948583-2-tero.kristo@linux.intel.com Cc: stable@vger.kernel.org Fixes: f46b16520a087 ("trace/hwlat: Implement the per-cpu mode") Signed-off-by: Tero Kristo Acked-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_hwlat.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index d440ddd5fd8b..edc26dc22c3f 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -584,9 +584,6 @@ static int start_per_cpu_kthreads(struct trace_array *tr) */ cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); - for_each_online_cpu(cpu) - per_cpu(hwlat_per_cpu_data, cpu).kthread = NULL; - for_each_cpu(cpu, current_mask) { retval = start_cpu_kthread(cpu); if (retval) -- cgit From 08697bca9bbba15f2058fdbd9f970bd5f6a8a2e8 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 10 Mar 2023 12:04:51 +0200 Subject: trace/hwlat: Do not start per-cpu thread if it is already running The hwlatd tracer will end up starting multiple per-cpu threads with the following script: #!/bin/sh cd /sys/kernel/debug/tracing echo 0 > tracing_on echo hwlat > current_tracer echo per-cpu > hwlat_detector/mode echo 100000 > hwlat_detector/width echo 200000 > hwlat_detector/window echo 1 > tracing_on To fix the issue, check if the hwlatd thread for the cpu is already running, before starting a new one. Along with the previous patch, this avoids running multiple instances of the same CPU thread on the system. Link: https://lore.kernel.org/all/20230302113654.2984709-1-tero.kristo@linux.intel.com/ Link: https://lkml.kernel.org/r/20230310100451.3948583-3-tero.kristo@linux.intel.com Cc: stable@vger.kernel.org Fixes: f46b16520a087 ("trace/hwlat: Implement the per-cpu mode") Signed-off-by: Tero Kristo Acked-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_hwlat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index edc26dc22c3f..c4945f8adc11 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -492,6 +492,10 @@ static int start_cpu_kthread(unsigned int cpu) { struct task_struct *kthread; + /* Do not start a new hwlatd thread if it is already running */ + if (per_cpu(hwlat_per_cpu_data, cpu).kthread) + return 0; + kthread = kthread_run_on_cpu(kthread_fn, NULL, cpu, "hwlatd/%u"); if (IS_ERR(kthread)) { pr_err(BANNER "could not start sampling thread\n"); -- cgit From 8732565549011cabbea08329a1aefd78a68d96c7 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 11 Mar 2023 08:51:13 -0500 Subject: ftrace: Set direct_ops storage-class-specifier to static smatch reports this warning kernel/trace/ftrace.c:2594:19: warning: symbol 'direct_ops' was not declared. Should it be static? The variable direct_ops is only used in ftrace.c, so it should be static Link: https://lore.kernel.org/linux-trace-kernel/20230311135113.711824-1-trix@redhat.com Signed-off-by: Tom Rix Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a47f7d93e32d..ec2897a76004 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2503,7 +2503,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip, arch_ftrace_set_direct_caller(fregs, addr); } -struct ftrace_ops direct_ops = { +static struct ftrace_ops direct_ops = { .func = call_direct_funcs, .flags = FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_PERMANENT, -- cgit From 9eb775968b68d049fb3b00353f12cd10308527c7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 15 Mar 2023 17:30:33 -0700 Subject: xfs: walk all AGs if TRYLOCK passed to xfs_alloc_vextent_iterate_ags Callers of xfs_alloc_vextent_iterate_ags that pass in the TRYLOCK flag want us to perform a non-blocking scan of the AGs for free space. There are no ordering constraints for non-blocking AGF lock acquisition, so the scan can freely start over at AG 0 even when minimum_agno > 0. This manifests fairly reliably on xfs/294 on 6.3-rc2 with the parent pointer patchset applied and the realtime volume enabled. I observed the following sequence as part of an xfs_dir_createname call: 0. Fragment the free space, then allocate nearly all the free space in all AGs except AG 0. 1. Create a directory in AG 2 and let it grow for a while. 2. Try to allocate 2 blocks to expand the dirent part of a directory. The space will be allocated out of AG 0, but the allocation will not be contiguous. This (I think) activates the LOWMODE allocator. 3. The bmapi call decides to convert from extents to bmbt format and tries to allocate 1 block. This allocation request calls xfs_alloc_vextent_start_ag with the inode number, which starts the scan at AG 2. We ignore AG 0 (with all its free space) and instead scrape AG 2 and 3 for more space. We find one block, but this now kicks t_highest_agno to 3. 4. The createname call decides it needs to split the dabtree. It tries to allocate even more space with xfs_alloc_vextent_start_ag, but now we're constrained to AG 3, and we don't find the space. The createname returns ENOSPC and the filesystem shuts down. This change fixes the problem by making the trylock scan wrap around to AG 0 if it doesn't like the AGs that it finds. Since the current transaction itself holds AGF 0, the trylock of AGF 0 will succeed, and we take space from the AG that has plenty. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 8999e38e1bed..bd7112d430b6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3326,11 +3326,14 @@ xfs_alloc_vextent_iterate_ags( uint32_t flags) { struct xfs_mount *mp = args->mp; + xfs_agnumber_t restart_agno = minimum_agno; xfs_agnumber_t agno; int error = 0; + if (flags & XFS_ALLOC_FLAG_TRYLOCK) + restart_agno = 0; restart: - for_each_perag_wrap_range(mp, start_agno, minimum_agno, + for_each_perag_wrap_range(mp, start_agno, restart_agno, mp->m_sb.sb_agcount, agno, args->pag) { args->agno = agno; error = xfs_alloc_vextent_prepare_ag(args); @@ -3369,6 +3372,7 @@ restart: */ if (flags) { flags = 0; + restart_agno = minimum_agno; goto restart; } -- cgit From e6fbb7167ed005783ac5aef3e75699f45ffe2af8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 15 Mar 2023 17:30:33 -0700 Subject: xfs: add tracepoints for each of the externally visible allocators There are now five separate space allocator interfaces exposed to the rest of XFS for five different strategies to find space. Add tracepoints for each of them so that I can tell from a trace dump exactly which ones got called and what happened underneath them. Add a sixth so it's more obvious if an allocation actually happened. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 17 +++++++++++++++++ fs/xfs/xfs_trace.h | 7 +++++++ 2 files changed, 24 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index bd7112d430b6..55ae08a6144c 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3255,6 +3255,8 @@ xfs_alloc_vextent_finish( XFS_STATS_INC(mp, xs_allocx); XFS_STATS_ADD(mp, xs_allocb, args->len); + trace_xfs_alloc_vextent_finish(args); + out_drop_perag: if (drop_perag && args->pag) { xfs_perag_rele(args->pag); @@ -3284,6 +3286,9 @@ xfs_alloc_vextent_this_ag( args->agno = agno; args->agbno = 0; + + trace_xfs_alloc_vextent_this_ag(args); + error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0), &minimum_agno); if (error) { @@ -3405,6 +3410,9 @@ xfs_alloc_vextent_start_ag( args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; + + trace_xfs_alloc_vextent_first_ag(args); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) @@ -3455,6 +3463,9 @@ xfs_alloc_vextent_first_ag( args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; + + trace_xfs_alloc_vextent_start_ag(args); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) @@ -3486,6 +3497,9 @@ xfs_alloc_vextent_exact_bno( args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); + + trace_xfs_alloc_vextent_near_bno(args); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) @@ -3521,6 +3535,9 @@ xfs_alloc_vextent_near_bno( args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); + + trace_xfs_alloc_vextent_exact_bno(args); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { if (error == -ENOSPC) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7dc0fd6a6504..9c0006c55fec 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1883,6 +1883,13 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_this_ag); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_start_ag); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_first_ag); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_exact_bno); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_near_bno); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_finish); + TRACE_EVENT(xfs_alloc_cur_check, TP_PROTO(struct xfs_mount *mp, xfs_btnum_t btnum, xfs_agblock_t bno, xfs_extlen_t len, xfs_extlen_t diff, bool new), -- cgit From 3cfb9290da3d87a5877b03bda96c3d5d3ed9fcb0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 16 Mar 2023 09:31:20 -0700 Subject: xfs: test dir/attr hash when loading module Back in the 6.2-rc1 days, Eric Whitney reported a fstests regression in ext4 against generic/454. The cause of this test failure was the unfortunate combination of setting an xattr name containing UTF8 encoded emoji, an xattr hash function that accepted a char pointer with no explicit signedness, signed type extension of those chars to an int, and the 6.2 build tools maintainers deciding to mandate -funsigned-char across the board. As a result, the ondisk extended attribute structure written out by 6.1 and 6.2 were not the same. This discrepancy, in fact, had been noticeable if a filesystem with such an xattr were moved between any two architectures that don't employ the same signedness of a raw "char" declaration. The only reason anyone noticed is that x86 gcc defaults to signed, and no such -funsigned-char update was made to e2fsprogs, so e2fsck immediately started reporting data corruption. After a day and a half of discussing how to handle this use case (xattrs with bit 7 set anywhere in the name) without breaking existing users, Linus merged his own patch and didn't tell the maintainer. None of the ext4 developers realized this until AUTOSEL announced that the commit had been backported to stable. In the end, this problem could have been detected much earlier if there had been any useful tests of hash function(s) in use inside ext4 to make sure that they always produce the same outputs given the same inputs. The XFS dirent/xattr name hash takes a uint8_t*, so I don't think it's vulnerable to this problem. However, let's avoid all this drama by adding our own self test to check that the da hash produces the same outputs for a static pile of inputs on various platforms. This enables us to fix any breakage that may result in a controlled fashion. The buffer and test data are identical to the patches submitted to xfsprogs. Link: https://lore.kernel.org/linux-ext4/Y8bpkm3jA3bDm3eL@debian-BULLSEYE-live-builder-AMD64/ Link: https://lore.kernel.org/linux-xfs/ZBUKCRR7xvIqPrpX@destitution/T/#md38272cc684e2c0d61494435ccbb91f022e8dee4 Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/Makefile | 1 + fs/xfs/xfs_dahash_test.c | 662 +++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_dahash_test.h | 12 + fs/xfs/xfs_super.c | 5 + 4 files changed, 680 insertions(+) create mode 100644 fs/xfs/xfs_dahash_test.c create mode 100644 fs/xfs/xfs_dahash_test.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 03135a1c31b6..92d88dc3c9f7 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -63,6 +63,7 @@ xfs-y += xfs_aops.o \ xfs_bmap_util.o \ xfs_bio_io.o \ xfs_buf.o \ + xfs_dahash_test.o \ xfs_dir2_readdir.o \ xfs_discard.o \ xfs_error.o \ diff --git a/fs/xfs/xfs_dahash_test.c b/fs/xfs/xfs_dahash_test.c new file mode 100644 index 000000000000..230651ab5ce4 --- /dev/null +++ b/fs/xfs/xfs_dahash_test.c @@ -0,0 +1,662 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dahash_test.h" + +/* 4096 random bytes */ +static uint8_t __initdata __attribute__((__aligned__(8))) test_buf[] = +{ + 0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30, + 0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4, + 0xc9, 0x6e, 0x8b, 0xdb, 0x98, 0x6b, 0xaa, 0x60, + 0xa8, 0xb5, 0xbc, 0x6c, 0xa9, 0xb1, 0x5b, 0x2c, + 0xea, 0xb4, 0x92, 0x6a, 0x3f, 0x79, 0x91, 0xe4, + 0xe9, 0x70, 0x51, 0x8c, 0x7f, 0x95, 0x6f, 0x1a, + 0x56, 0xa1, 0x5c, 0x27, 0x03, 0x67, 0x9f, 0x3a, + 0xe2, 0x31, 0x11, 0x29, 0x6b, 0x98, 0xfc, 0xc4, + 0x53, 0x24, 0xc5, 0x8b, 0xce, 0x47, 0xb2, 0xb9, + 0x32, 0xcb, 0xc1, 0xd0, 0x03, 0x57, 0x4e, 0xd4, + 0xe9, 0x3c, 0xa1, 0x63, 0xcf, 0x12, 0x0e, 0xca, + 0xe1, 0x13, 0xd1, 0x93, 0xa6, 0x88, 0x5c, 0x61, + 0x5b, 0xbb, 0xf0, 0x19, 0x46, 0xb4, 0xcf, 0x9e, + 0xb6, 0x6b, 0x4c, 0x3a, 0xcf, 0x60, 0xf9, 0x7a, + 0x8d, 0x07, 0x63, 0xdb, 0x40, 0xe9, 0x0b, 0x6f, + 0xad, 0x97, 0xf1, 0xed, 0xd0, 0x1e, 0x26, 0xfd, + 0xbf, 0xb7, 0xc8, 0x04, 0x94, 0xf8, 0x8b, 0x8c, + 0xf1, 0xab, 0x7a, 0xd4, 0xdd, 0xf3, 0xe8, 0x88, + 0xc3, 0xed, 0x17, 0x8a, 0x9b, 0x40, 0x0d, 0x53, + 0x62, 0x12, 0x03, 0x5f, 0x1b, 0x35, 0x32, 0x1f, + 0xb4, 0x7b, 0x93, 0x78, 0x0d, 0xdb, 0xce, 0xa4, + 0xc0, 0x47, 0xd5, 0xbf, 0x68, 0xe8, 0x5d, 0x74, + 0x8f, 0x8e, 0x75, 0x1c, 0xb2, 0x4f, 0x9a, 0x60, + 0xd1, 0xbe, 0x10, 0xf4, 0x5c, 0xa1, 0x53, 0x09, + 0xa5, 0xe0, 0x09, 0x54, 0x85, 0x5c, 0xdc, 0x07, + 0xe7, 0x21, 0x69, 0x7b, 0x8a, 0xfd, 0x90, 0xf1, + 0x22, 0xd0, 0xb4, 0x36, 0x28, 0xe6, 0xb8, 0x0f, + 0x39, 0xde, 0xc8, 0xf3, 0x86, 0x60, 0x34, 0xd2, + 0x5e, 0xdf, 0xfd, 0xcf, 0x0f, 0xa9, 0x65, 0xf0, + 0xd5, 0x4d, 0x96, 0x40, 0xe3, 0xdf, 0x3f, 0x95, + 0x5a, 0x39, 0x19, 0x93, 0xf4, 0x75, 0xce, 0x22, + 0x00, 0x1c, 0x93, 0xe2, 0x03, 0x66, 0xf4, 0x93, + 0x73, 0x86, 0x81, 0x8e, 0x29, 0x44, 0x48, 0x86, + 0x61, 0x7c, 0x48, 0xa3, 0x43, 0xd2, 0x9c, 0x8d, + 0xd4, 0x95, 0xdd, 0xe1, 0x22, 0x89, 0x3a, 0x40, + 0x4c, 0x1b, 0x8a, 0x04, 0xa8, 0x09, 0x69, 0x8b, + 0xea, 0xc6, 0x55, 0x8e, 0x57, 0xe6, 0x64, 0x35, + 0xf0, 0xc7, 0x16, 0x9f, 0x5d, 0x5e, 0x86, 0x40, + 0x46, 0xbb, 0xe5, 0x45, 0x88, 0xfe, 0xc9, 0x63, + 0x15, 0xfb, 0xf5, 0xbd, 0x71, 0x61, 0xeb, 0x7b, + 0x78, 0x70, 0x07, 0x31, 0x03, 0x9f, 0xb2, 0xc8, + 0xa7, 0xab, 0x47, 0xfd, 0xdf, 0xa0, 0x78, 0x72, + 0xa4, 0x2a, 0xe4, 0xb6, 0xba, 0xc0, 0x1e, 0x86, + 0x71, 0xe6, 0x3d, 0x18, 0x37, 0x70, 0xe6, 0xff, + 0xe0, 0xbc, 0x0b, 0x22, 0xa0, 0x1f, 0xd3, 0xed, + 0xa2, 0x55, 0x39, 0xab, 0xa8, 0x13, 0x73, 0x7c, + 0x3f, 0xb2, 0xd6, 0x19, 0xac, 0xff, 0x99, 0xed, + 0xe8, 0xe6, 0xa6, 0x22, 0xe3, 0x9c, 0xf1, 0x30, + 0xdc, 0x01, 0x0a, 0x56, 0xfa, 0xe4, 0xc9, 0x99, + 0xdd, 0xa8, 0xd8, 0xda, 0x35, 0x51, 0x73, 0xb4, + 0x40, 0x86, 0x85, 0xdb, 0x5c, 0xd5, 0x85, 0x80, + 0x14, 0x9c, 0xfd, 0x98, 0xa9, 0x82, 0xc5, 0x37, + 0xff, 0x32, 0x5d, 0xd0, 0x0b, 0xfa, 0xdc, 0x04, + 0x5e, 0x09, 0xd2, 0xca, 0x17, 0x4b, 0x1a, 0x8e, + 0x15, 0xe1, 0xcc, 0x4e, 0x52, 0x88, 0x35, 0xbd, + 0x48, 0xfe, 0x15, 0xa0, 0x91, 0xfd, 0x7e, 0x6c, + 0x0e, 0x5d, 0x79, 0x1b, 0x81, 0x79, 0xd2, 0x09, + 0x34, 0x70, 0x3d, 0x81, 0xec, 0xf6, 0x24, 0xbb, + 0xfb, 0xf1, 0x7b, 0xdf, 0x54, 0xea, 0x80, 0x9b, + 0xc7, 0x99, 0x9e, 0xbd, 0x16, 0x78, 0x12, 0x53, + 0x5e, 0x01, 0xa7, 0x4e, 0xbd, 0x67, 0xe1, 0x9b, + 0x4c, 0x0e, 0x61, 0x45, 0x97, 0xd2, 0xf0, 0x0f, + 0xfe, 0x15, 0x08, 0xb7, 0x11, 0x4c, 0xe7, 0xff, + 0x81, 0x53, 0xff, 0x91, 0x25, 0x38, 0x7e, 0x40, + 0x94, 0xe5, 0xe0, 0xad, 0xe6, 0xd9, 0x79, 0xb6, + 0x92, 0xc9, 0xfc, 0xde, 0xc3, 0x1a, 0x23, 0xbb, + 0xdd, 0xc8, 0x51, 0x0c, 0x3a, 0x72, 0xfa, 0x73, + 0x6f, 0xb7, 0xee, 0x61, 0x39, 0x03, 0x01, 0x3f, + 0x7f, 0x94, 0x2e, 0x2e, 0xba, 0x3a, 0xbb, 0xb4, + 0xfa, 0x6a, 0x17, 0xfe, 0xea, 0xef, 0x5e, 0x66, + 0x97, 0x3f, 0x32, 0x3d, 0xd7, 0x3e, 0xb1, 0xf1, + 0x6c, 0x14, 0x4c, 0xfd, 0x37, 0xd3, 0x38, 0x80, + 0xfb, 0xde, 0xa6, 0x24, 0x1e, 0xc8, 0xca, 0x7f, + 0x3a, 0x93, 0xd8, 0x8b, 0x18, 0x13, 0xb2, 0xe5, + 0xe4, 0x93, 0x05, 0x53, 0x4f, 0x84, 0x66, 0xa7, + 0x58, 0x5c, 0x7b, 0x86, 0x52, 0x6d, 0x0d, 0xce, + 0xa4, 0x30, 0x7d, 0xb6, 0x18, 0x9f, 0xeb, 0xff, + 0x22, 0xbb, 0x72, 0x29, 0xb9, 0x44, 0x0b, 0x48, + 0x1e, 0x84, 0x71, 0x81, 0xe3, 0x6d, 0x73, 0x26, + 0x92, 0xb4, 0x4d, 0x2a, 0x29, 0xb8, 0x1f, 0x72, + 0xed, 0xd0, 0xe1, 0x64, 0x77, 0xea, 0x8e, 0x88, + 0x0f, 0xef, 0x3f, 0xb1, 0x3b, 0xad, 0xf9, 0xc9, + 0x8b, 0xd0, 0xac, 0xc6, 0xcc, 0xa9, 0x40, 0xcc, + 0x76, 0xf6, 0x3b, 0x53, 0xb5, 0x88, 0xcb, 0xc8, + 0x37, 0xf1, 0xa2, 0xba, 0x23, 0x15, 0x99, 0x09, + 0xcc, 0xe7, 0x7a, 0x3b, 0x37, 0xf7, 0x58, 0xc8, + 0x46, 0x8c, 0x2b, 0x2f, 0x4e, 0x0e, 0xa6, 0x5c, + 0xea, 0x85, 0x55, 0xba, 0x02, 0x0e, 0x0e, 0x48, + 0xbc, 0xe1, 0xb1, 0x01, 0x35, 0x79, 0x13, 0x3d, + 0x1b, 0xc0, 0x53, 0x68, 0x11, 0xe7, 0x95, 0x0f, + 0x9d, 0x3f, 0x4c, 0x47, 0x7b, 0x4d, 0x1c, 0xae, + 0x50, 0x9b, 0xcb, 0xdd, 0x05, 0x8d, 0x9a, 0x97, + 0xfd, 0x8c, 0xef, 0x0c, 0x1d, 0x67, 0x73, 0xa8, + 0x28, 0x36, 0xd5, 0xb6, 0x92, 0x33, 0x40, 0x75, + 0x0b, 0x51, 0xc3, 0x64, 0xba, 0x1d, 0xc2, 0xcc, + 0xee, 0x7d, 0x54, 0x0f, 0x27, 0x69, 0xa7, 0x27, + 0x63, 0x30, 0x29, 0xd9, 0xc8, 0x84, 0xd8, 0xdf, + 0x9f, 0x68, 0x8d, 0x04, 0xca, 0xa6, 0xc5, 0xc7, + 0x7a, 0x5c, 0xc8, 0xd1, 0xcb, 0x4a, 0xec, 0xd0, + 0xd8, 0x20, 0x69, 0xc5, 0x17, 0xcd, 0x78, 0xc8, + 0x75, 0x23, 0x30, 0x69, 0xc9, 0xd4, 0xea, 0x5c, + 0x4f, 0x6b, 0x86, 0x3f, 0x8b, 0xfe, 0xee, 0x44, + 0xc9, 0x7c, 0xb7, 0xdd, 0x3e, 0xe5, 0xec, 0x54, + 0x03, 0x3e, 0xaa, 0x82, 0xc6, 0xdf, 0xb2, 0x38, + 0x0e, 0x5d, 0xb3, 0x88, 0xd9, 0xd3, 0x69, 0x5f, + 0x8f, 0x70, 0x8a, 0x7e, 0x11, 0xd9, 0x1e, 0x7b, + 0x38, 0xf1, 0x42, 0x1a, 0xc0, 0x35, 0xf5, 0xc7, + 0x36, 0x85, 0xf5, 0xf7, 0xb8, 0x7e, 0xc7, 0xef, + 0x18, 0xf1, 0x63, 0xd6, 0x7a, 0xc6, 0xc9, 0x0e, + 0x4d, 0x69, 0x4f, 0x84, 0xef, 0x26, 0x41, 0x0c, + 0xec, 0xc7, 0xe0, 0x7e, 0x3c, 0x67, 0x01, 0x4c, + 0x62, 0x1a, 0x20, 0x6f, 0xee, 0x47, 0x4d, 0xc0, + 0x99, 0x13, 0x8d, 0x91, 0x4a, 0x26, 0xd4, 0x37, + 0x28, 0x90, 0x58, 0x75, 0x66, 0x2b, 0x0a, 0xdf, + 0xda, 0xee, 0x92, 0x25, 0x90, 0x62, 0x39, 0x9e, + 0x44, 0x98, 0xad, 0xc1, 0x88, 0xed, 0xe4, 0xb4, + 0xaf, 0xf5, 0x8c, 0x9b, 0x48, 0x4d, 0x56, 0x60, + 0x97, 0x0f, 0x61, 0x59, 0x9e, 0xa6, 0x27, 0xfe, + 0xc1, 0x91, 0x15, 0x38, 0xb8, 0x0f, 0xae, 0x61, + 0x7d, 0x26, 0x13, 0x5a, 0x73, 0xff, 0x1c, 0xa3, + 0x61, 0x04, 0x58, 0x48, 0x55, 0x44, 0x11, 0xfe, + 0x15, 0xca, 0xc3, 0xbd, 0xca, 0xc5, 0xb4, 0x40, + 0x5d, 0x1b, 0x7f, 0x39, 0xb5, 0x9c, 0x35, 0xec, + 0x61, 0x15, 0x32, 0x32, 0xb8, 0x4e, 0x40, 0x9f, + 0x17, 0x1f, 0x0a, 0x4d, 0xa9, 0x91, 0xef, 0xb7, + 0xb0, 0xeb, 0xc2, 0x83, 0x9a, 0x6c, 0xd2, 0x79, + 0x43, 0x78, 0x5e, 0x2f, 0xe5, 0xdd, 0x1a, 0x3c, + 0x45, 0xab, 0x29, 0x40, 0x3a, 0x37, 0x5b, 0x6f, + 0xd7, 0xfc, 0x48, 0x64, 0x3c, 0x49, 0xfb, 0x21, + 0xbe, 0xc3, 0xff, 0x07, 0xfb, 0x17, 0xe9, 0xc9, + 0x0c, 0x4c, 0x5c, 0x15, 0x9e, 0x8e, 0x22, 0x30, + 0x0a, 0xde, 0x48, 0x7f, 0xdb, 0x0d, 0xd1, 0x2b, + 0x87, 0x38, 0x9e, 0xcc, 0x5a, 0x01, 0x16, 0xee, + 0x75, 0x49, 0x0d, 0x30, 0x01, 0x34, 0x6a, 0xb6, + 0x9a, 0x5a, 0x2a, 0xec, 0xbb, 0x48, 0xac, 0xd3, + 0x77, 0x83, 0xd8, 0x08, 0x86, 0x4f, 0x48, 0x09, + 0x29, 0x41, 0x79, 0xa1, 0x03, 0x12, 0xc4, 0xcd, + 0x90, 0x55, 0x47, 0x66, 0x74, 0x9a, 0xcc, 0x4f, + 0x35, 0x8c, 0xd6, 0x98, 0xef, 0xeb, 0x45, 0xb9, + 0x9a, 0x26, 0x2f, 0x39, 0xa5, 0x70, 0x6d, 0xfc, + 0xb4, 0x51, 0xee, 0xf4, 0x9c, 0xe7, 0x38, 0x59, + 0xad, 0xf4, 0xbc, 0x46, 0xff, 0x46, 0x8e, 0x60, + 0x9c, 0xa3, 0x60, 0x1d, 0xf8, 0x26, 0x72, 0xf5, + 0x72, 0x9d, 0x68, 0x80, 0x04, 0xf6, 0x0b, 0xa1, + 0x0a, 0xd5, 0xa7, 0x82, 0x3a, 0x3e, 0x47, 0xa8, + 0x5a, 0xde, 0x59, 0x4f, 0x7b, 0x07, 0xb3, 0xe9, + 0x24, 0x19, 0x3d, 0x34, 0x05, 0xec, 0xf1, 0xab, + 0x6e, 0x64, 0x8f, 0xd3, 0xe6, 0x41, 0x86, 0x80, + 0x70, 0xe3, 0x8d, 0x60, 0x9c, 0x34, 0x25, 0x01, + 0x07, 0x4d, 0x19, 0x41, 0x4e, 0x3d, 0x5c, 0x7e, + 0xa8, 0xf5, 0xcc, 0xd5, 0x7b, 0xe2, 0x7d, 0x3d, + 0x49, 0x86, 0x7d, 0x07, 0xb7, 0x10, 0xe3, 0x35, + 0xb8, 0x84, 0x6d, 0x76, 0xab, 0x17, 0xc6, 0x38, + 0xb4, 0xd3, 0x28, 0x57, 0xad, 0xd3, 0x88, 0x5a, + 0xda, 0xea, 0xc8, 0x94, 0xcc, 0x37, 0x19, 0xac, + 0x9c, 0x9f, 0x4b, 0x00, 0x15, 0xc0, 0xc8, 0xca, + 0x1f, 0x15, 0xaa, 0xe0, 0xdb, 0xf9, 0x2f, 0x57, + 0x1b, 0x24, 0xc7, 0x6f, 0x76, 0x29, 0xfb, 0xed, + 0x25, 0x0d, 0xc0, 0xfe, 0xbd, 0x5a, 0xbf, 0x20, + 0x08, 0x51, 0x05, 0xec, 0x71, 0xa3, 0xbf, 0xef, + 0x5e, 0x99, 0x75, 0xdb, 0x3c, 0x5f, 0x9a, 0x8c, + 0xbb, 0x19, 0x5c, 0x0e, 0x93, 0x19, 0xf8, 0x6a, + 0xbc, 0xf2, 0x12, 0x54, 0x2f, 0xcb, 0x28, 0x64, + 0x88, 0xb3, 0x92, 0x0d, 0x96, 0xd1, 0xa6, 0xe4, + 0x1f, 0xf1, 0x4d, 0xa4, 0xab, 0x1c, 0xee, 0x54, + 0xf2, 0xad, 0x29, 0x6d, 0x32, 0x37, 0xb2, 0x16, + 0x77, 0x5c, 0xdc, 0x2e, 0x54, 0xec, 0x75, 0x26, + 0xc6, 0x36, 0xd9, 0x17, 0x2c, 0xf1, 0x7a, 0xdc, + 0x4b, 0xf1, 0xe2, 0xd9, 0x95, 0xba, 0xac, 0x87, + 0xc1, 0xf3, 0x8e, 0x58, 0x08, 0xd8, 0x87, 0x60, + 0xc9, 0xee, 0x6a, 0xde, 0xa4, 0xd2, 0xfc, 0x0d, + 0xe5, 0x36, 0xc4, 0x5c, 0x52, 0xb3, 0x07, 0x54, + 0x65, 0x24, 0xc1, 0xb1, 0xd1, 0xb1, 0x53, 0x13, + 0x31, 0x79, 0x7f, 0x05, 0x76, 0xeb, 0x37, 0x59, + 0x15, 0x2b, 0xd1, 0x3f, 0xac, 0x08, 0x97, 0xeb, + 0x91, 0x98, 0xdf, 0x6c, 0x09, 0x0d, 0x04, 0x9f, + 0xdc, 0x3b, 0x0e, 0x60, 0x68, 0x47, 0x23, 0x15, + 0x16, 0xc6, 0x0b, 0x35, 0xf8, 0x77, 0xa2, 0x78, + 0x50, 0xd4, 0x64, 0x22, 0x33, 0xff, 0xfb, 0x93, + 0x71, 0x46, 0x50, 0x39, 0x1b, 0x9c, 0xea, 0x4e, + 0x8d, 0x0c, 0x37, 0xe5, 0x5c, 0x51, 0x3a, 0x31, + 0xb2, 0x85, 0x84, 0x3f, 0x41, 0xee, 0xa2, 0xc1, + 0xc6, 0x13, 0x3b, 0x54, 0x28, 0xd2, 0x18, 0x37, + 0xcc, 0x46, 0x9f, 0x6a, 0x91, 0x3d, 0x5a, 0x15, + 0x3c, 0x89, 0xa3, 0x61, 0x06, 0x7d, 0x2e, 0x78, + 0xbe, 0x7d, 0x40, 0xba, 0x2f, 0x95, 0xb1, 0x2f, + 0x87, 0x3b, 0x8a, 0xbe, 0x6a, 0xf4, 0xc2, 0x31, + 0x74, 0xee, 0x91, 0xe0, 0x23, 0xaa, 0x5d, 0x7f, + 0xdd, 0xf0, 0x44, 0x8c, 0x0b, 0x59, 0x2b, 0xfc, + 0x48, 0x3a, 0xdf, 0x07, 0x05, 0x38, 0x6c, 0xc9, + 0xeb, 0x18, 0x24, 0x68, 0x8d, 0x58, 0x98, 0xd3, + 0x31, 0xa3, 0xe4, 0x70, 0x59, 0xb1, 0x21, 0xbe, + 0x7e, 0x65, 0x7d, 0xb8, 0x04, 0xab, 0xf6, 0xe4, + 0xd7, 0xda, 0xec, 0x09, 0x8f, 0xda, 0x6d, 0x24, + 0x07, 0xcc, 0x29, 0x17, 0x05, 0x78, 0x1a, 0xc1, + 0xb1, 0xce, 0xfc, 0xaa, 0x2d, 0xe7, 0xcc, 0x85, + 0x84, 0x84, 0x03, 0x2a, 0x0c, 0x3f, 0xa9, 0xf8, + 0xfd, 0x84, 0x53, 0x59, 0x5c, 0xf0, 0xd4, 0x09, + 0xf0, 0xd2, 0x6c, 0x32, 0x03, 0xb0, 0xa0, 0x8c, + 0x52, 0xeb, 0x23, 0x91, 0x88, 0x43, 0x13, 0x46, + 0xf6, 0x1e, 0xb4, 0x1b, 0xf5, 0x8e, 0x3a, 0xb5, + 0x3d, 0x00, 0xf6, 0xe5, 0x08, 0x3d, 0x5f, 0x39, + 0xd3, 0x21, 0x69, 0xbc, 0x03, 0x22, 0x3a, 0xd2, + 0x5c, 0x84, 0xf8, 0x15, 0xc4, 0x80, 0x0b, 0xbc, + 0x29, 0x3c, 0xf3, 0x95, 0x98, 0xcd, 0x8f, 0x35, + 0xbc, 0xa5, 0x3e, 0xfc, 0xd4, 0x13, 0x9e, 0xde, + 0x4f, 0xce, 0x71, 0x9d, 0x09, 0xad, 0xf2, 0x80, + 0x6b, 0x65, 0x7f, 0x03, 0x00, 0x14, 0x7c, 0x15, + 0x85, 0x40, 0x6d, 0x70, 0xea, 0xdc, 0xb3, 0x63, + 0x35, 0x4f, 0x4d, 0xe0, 0xd9, 0xd5, 0x3c, 0x58, + 0x56, 0x23, 0x80, 0xe2, 0x36, 0xdd, 0x75, 0x1d, + 0x94, 0x11, 0x41, 0x8e, 0xe0, 0x81, 0x8e, 0xcf, + 0xe0, 0xe5, 0xf6, 0xde, 0xd1, 0xe7, 0x04, 0x12, + 0x79, 0x92, 0x2b, 0x71, 0x2a, 0x79, 0x8b, 0x7c, + 0x44, 0x79, 0x16, 0x30, 0x4e, 0xf4, 0xf6, 0x9b, + 0xb7, 0x40, 0xa3, 0x5a, 0xa7, 0x69, 0x3e, 0xc1, + 0x3a, 0x04, 0xd0, 0x88, 0xa0, 0x3b, 0xdd, 0xc6, + 0x9e, 0x7e, 0x1e, 0x1e, 0x8f, 0x44, 0xf7, 0x73, + 0x67, 0x1e, 0x1a, 0x78, 0xfa, 0x62, 0xf4, 0xa9, + 0xa8, 0xc6, 0x5b, 0xb8, 0xfa, 0x06, 0x7d, 0x5e, + 0x38, 0x1c, 0x9a, 0x39, 0xe9, 0x39, 0x98, 0x22, + 0x0b, 0xa7, 0xac, 0x0b, 0xf3, 0xbc, 0xf1, 0xeb, + 0x8c, 0x81, 0xe3, 0x48, 0x8a, 0xed, 0x42, 0xc2, + 0x38, 0xcf, 0x3e, 0xda, 0xd2, 0x89, 0x8d, 0x9c, + 0x53, 0xb5, 0x2f, 0x41, 0x01, 0x26, 0x84, 0x9c, + 0xa3, 0x56, 0xf6, 0x49, 0xc7, 0xd4, 0x9f, 0x93, + 0x1b, 0x96, 0x49, 0x5e, 0xad, 0xb3, 0x84, 0x1f, + 0x3c, 0xa4, 0xe0, 0x9b, 0xd1, 0x90, 0xbc, 0x38, + 0x6c, 0xdd, 0x95, 0x4d, 0x9d, 0xb1, 0x71, 0x57, + 0x2d, 0x34, 0xe8, 0xb8, 0x42, 0xc7, 0x99, 0x03, + 0xc7, 0x07, 0x30, 0x65, 0x91, 0x55, 0xd5, 0x90, + 0x70, 0x97, 0x37, 0x68, 0xd4, 0x11, 0xf9, 0xe8, + 0xce, 0xec, 0xdc, 0x34, 0xd5, 0xd3, 0xb7, 0xc4, + 0xb8, 0x97, 0x05, 0x92, 0xad, 0xf8, 0xe2, 0x36, + 0x64, 0x41, 0xc9, 0xc5, 0x41, 0x77, 0x52, 0xd7, + 0x2c, 0xa5, 0x24, 0x2f, 0xd9, 0x34, 0x0b, 0x47, + 0x35, 0xa7, 0x28, 0x8b, 0xc5, 0xcd, 0xe9, 0x46, + 0xac, 0x39, 0x94, 0x3c, 0x10, 0xc6, 0x29, 0x73, + 0x0e, 0x0e, 0x5d, 0xe0, 0x71, 0x03, 0x8a, 0x72, + 0x0e, 0x26, 0xb0, 0x7d, 0x84, 0xed, 0x95, 0x23, + 0x49, 0x5a, 0x45, 0x83, 0x45, 0x60, 0x11, 0x4a, + 0x46, 0x31, 0xd4, 0xd8, 0x16, 0x54, 0x98, 0x58, + 0xed, 0x6d, 0xcc, 0x5d, 0xd6, 0x50, 0x61, 0x9f, + 0x9d, 0xc5, 0x3e, 0x9d, 0x32, 0x47, 0xde, 0x96, + 0xe1, 0x5d, 0xd8, 0xf8, 0xb4, 0x69, 0x6f, 0xb9, + 0x15, 0x90, 0x57, 0x7a, 0xf6, 0xad, 0xb0, 0x5b, + 0xf5, 0xa6, 0x36, 0x94, 0xfd, 0x84, 0xce, 0x1c, + 0x0f, 0x4b, 0xd0, 0xc2, 0x5b, 0x6b, 0x56, 0xef, + 0x73, 0x93, 0x0b, 0xc3, 0xee, 0xd9, 0xcf, 0xd3, + 0xa4, 0x22, 0x58, 0xcd, 0x50, 0x6e, 0x65, 0xf4, + 0xe9, 0xb7, 0x71, 0xaf, 0x4b, 0xb3, 0xb6, 0x2f, + 0x0f, 0x0e, 0x3b, 0xc9, 0x85, 0x14, 0xf5, 0x17, + 0xe8, 0x7a, 0x3a, 0xbf, 0x5f, 0x5e, 0xf8, 0x18, + 0x48, 0xa6, 0x72, 0xab, 0x06, 0x95, 0xe9, 0xc8, + 0xa7, 0xf4, 0x32, 0x44, 0x04, 0x0c, 0x84, 0x98, + 0x73, 0xe3, 0x89, 0x8d, 0x5f, 0x7e, 0x4a, 0x42, + 0x8f, 0xc5, 0x28, 0xb1, 0x82, 0xef, 0x1c, 0x97, + 0x31, 0x3b, 0x4d, 0xe0, 0x0e, 0x10, 0x10, 0x97, + 0x93, 0x49, 0x78, 0x2f, 0x0d, 0x86, 0x8b, 0xa1, + 0x53, 0xa9, 0x81, 0x20, 0x79, 0xe7, 0x07, 0x77, + 0xb6, 0xac, 0x5e, 0xd2, 0x05, 0xcd, 0xe9, 0xdb, + 0x8a, 0x94, 0x82, 0x8a, 0x23, 0xb9, 0x3d, 0x1c, + 0xa9, 0x7d, 0x72, 0x4a, 0xed, 0x33, 0xa3, 0xdb, + 0x21, 0xa7, 0x86, 0x33, 0x45, 0xa5, 0xaa, 0x56, + 0x45, 0xb5, 0x83, 0x29, 0x40, 0x47, 0x79, 0x04, + 0x6e, 0xb9, 0x95, 0xd0, 0x81, 0x77, 0x2d, 0x48, + 0x1e, 0xfe, 0xc3, 0xc2, 0x1e, 0xe5, 0xf2, 0xbe, + 0xfd, 0x3b, 0x94, 0x9f, 0xc4, 0xc4, 0x26, 0x9d, + 0xe4, 0x66, 0x1e, 0x19, 0xee, 0x6c, 0x79, 0x97, + 0x11, 0x31, 0x4b, 0x0d, 0x01, 0xcb, 0xde, 0xa8, + 0xf6, 0x6d, 0x7c, 0x39, 0x46, 0x4e, 0x7e, 0x3f, + 0x94, 0x17, 0xdf, 0xa1, 0x7d, 0xd9, 0x1c, 0x8e, + 0xbc, 0x7d, 0x33, 0x7d, 0xe3, 0x12, 0x40, 0xca, + 0xab, 0x37, 0x11, 0x46, 0xd4, 0xae, 0xef, 0x44, + 0xa2, 0xb3, 0x6a, 0x66, 0x0e, 0x0c, 0x90, 0x7f, + 0xdf, 0x5c, 0x66, 0x5f, 0xf2, 0x94, 0x9f, 0xa6, + 0x73, 0x4f, 0xeb, 0x0d, 0xad, 0xbf, 0xc0, 0x63, + 0x5c, 0xdc, 0x46, 0x51, 0xe8, 0x8e, 0x90, 0x19, + 0xa8, 0xa4, 0x3c, 0x91, 0x79, 0xfa, 0x7e, 0x58, + 0x85, 0x13, 0x55, 0xc5, 0x19, 0x82, 0x37, 0x1b, + 0x0a, 0x02, 0x1f, 0x99, 0x6b, 0x18, 0xf1, 0x28, + 0x08, 0xa2, 0x73, 0xb8, 0x0f, 0x2e, 0xcd, 0xbf, + 0xf3, 0x86, 0x7f, 0xea, 0xef, 0xd0, 0xbb, 0xa6, + 0x21, 0xdf, 0x49, 0x73, 0x51, 0xcc, 0x36, 0xd3, + 0x3e, 0xa0, 0xf8, 0x44, 0xdf, 0xd3, 0xa6, 0xbe, + 0x8a, 0xd4, 0x57, 0xdd, 0x72, 0x94, 0x61, 0x0f, + 0x82, 0xd1, 0x07, 0xb8, 0x7c, 0x18, 0x83, 0xdf, + 0x3a, 0xe5, 0x50, 0x6a, 0x82, 0x20, 0xac, 0xa9, + 0xa8, 0xff, 0xd9, 0xf3, 0x77, 0x33, 0x5a, 0x9e, + 0x7f, 0x6d, 0xfe, 0x5d, 0x33, 0x41, 0x42, 0xe7, + 0x6c, 0x19, 0xe0, 0x44, 0x8a, 0x15, 0xf6, 0x70, + 0x98, 0xb7, 0x68, 0x4d, 0xfa, 0x97, 0x39, 0xb0, + 0x8e, 0xe8, 0x84, 0x8b, 0x75, 0x30, 0xb7, 0x7d, + 0x92, 0x69, 0x20, 0x9c, 0x81, 0xfb, 0x4b, 0xf4, + 0x01, 0x50, 0xeb, 0xce, 0x0c, 0x1c, 0x6c, 0xb5, + 0x4a, 0xd7, 0x27, 0x0c, 0xce, 0xbb, 0xe5, 0x85, + 0xf0, 0xb6, 0xee, 0xd5, 0x70, 0xdd, 0x3b, 0xfc, + 0xd4, 0x99, 0xf1, 0x33, 0xdd, 0x8b, 0xc4, 0x2f, + 0xae, 0xab, 0x74, 0x96, 0x32, 0xc7, 0x4c, 0x56, + 0x3c, 0x89, 0x0f, 0x96, 0x0b, 0x42, 0xc0, 0xcb, + 0xee, 0x0f, 0x0b, 0x8c, 0xfb, 0x7e, 0x47, 0x7b, + 0x64, 0x48, 0xfd, 0xb2, 0x00, 0x80, 0x89, 0xa5, + 0x13, 0x55, 0x62, 0xfc, 0x8f, 0xe2, 0x42, 0x03, + 0xb7, 0x4e, 0x2a, 0x79, 0xb4, 0x82, 0xea, 0x23, + 0x49, 0xda, 0xaf, 0x52, 0x63, 0x1e, 0x60, 0x03, + 0x89, 0x06, 0x44, 0x46, 0x08, 0xc3, 0xc4, 0x87, + 0x70, 0x2e, 0xda, 0x94, 0xad, 0x6b, 0xe0, 0xe4, + 0xd1, 0x8a, 0x06, 0xc2, 0xa8, 0xc0, 0xa7, 0x43, + 0x3c, 0x47, 0x52, 0x0e, 0xc3, 0x77, 0x81, 0x11, + 0x67, 0x0e, 0xa0, 0x70, 0x04, 0x47, 0x29, 0x40, + 0x86, 0x0d, 0x34, 0x56, 0xa7, 0xc9, 0x35, 0x59, + 0x68, 0xdc, 0x93, 0x81, 0x70, 0xee, 0x86, 0xd9, + 0x80, 0x06, 0x40, 0x4f, 0x1a, 0x0d, 0x40, 0x30, + 0x0b, 0xcb, 0x96, 0x47, 0xc1, 0xb7, 0x52, 0xfd, + 0x56, 0xe0, 0x72, 0x4b, 0xfb, 0xbd, 0x92, 0x45, + 0x61, 0x71, 0xc2, 0x33, 0x11, 0xbf, 0x52, 0x83, + 0x79, 0x26, 0xe0, 0x49, 0x6b, 0xb7, 0x05, 0x8b, + 0xe8, 0x0e, 0x87, 0x31, 0xd7, 0x9d, 0x8a, 0xf5, + 0xc0, 0x5f, 0x2e, 0x58, 0x4a, 0xdb, 0x11, 0xb3, + 0x6c, 0x30, 0x2a, 0x46, 0x19, 0xe3, 0x27, 0x84, + 0x1f, 0x63, 0x6e, 0xf6, 0x57, 0xc7, 0xc9, 0xd8, + 0x5e, 0xba, 0xb3, 0x87, 0xd5, 0x83, 0x26, 0x34, + 0x21, 0x9e, 0x65, 0xde, 0x42, 0xd3, 0xbe, 0x7b, + 0xbc, 0x91, 0x71, 0x44, 0x4d, 0x99, 0x3b, 0x31, + 0xe5, 0x3f, 0x11, 0x4e, 0x7f, 0x13, 0x51, 0x3b, + 0xae, 0x79, 0xc9, 0xd3, 0x81, 0x8e, 0x25, 0x40, + 0x10, 0xfc, 0x07, 0x1e, 0xf9, 0x7b, 0x9a, 0x4b, + 0x6c, 0xe3, 0xb3, 0xad, 0x1a, 0x0a, 0xdd, 0x9e, + 0x59, 0x0c, 0xa2, 0xcd, 0xae, 0x48, 0x4a, 0x38, + 0x5b, 0x47, 0x41, 0x94, 0x65, 0x6b, 0xbb, 0xeb, + 0x5b, 0xe3, 0xaf, 0x07, 0x5b, 0xd4, 0x4a, 0xa2, + 0xc9, 0x5d, 0x2f, 0x64, 0x03, 0xd7, 0x3a, 0x2c, + 0x6e, 0xce, 0x76, 0x95, 0xb4, 0xb3, 0xc0, 0xf1, + 0xe2, 0x45, 0x73, 0x7a, 0x5c, 0xab, 0xc1, 0xfc, + 0x02, 0x8d, 0x81, 0x29, 0xb3, 0xac, 0x07, 0xec, + 0x40, 0x7d, 0x45, 0xd9, 0x7a, 0x59, 0xee, 0x34, + 0xf0, 0xe9, 0xd5, 0x7b, 0x96, 0xb1, 0x3d, 0x95, + 0xcc, 0x86, 0xb5, 0xb6, 0x04, 0x2d, 0xb5, 0x92, + 0x7e, 0x76, 0xf4, 0x06, 0xa9, 0xa3, 0x12, 0x0f, + 0xb1, 0xaf, 0x26, 0xba, 0x7c, 0xfc, 0x7e, 0x1c, + 0xbc, 0x2c, 0x49, 0x97, 0x53, 0x60, 0x13, 0x0b, + 0xa6, 0x61, 0x83, 0x89, 0x42, 0xd4, 0x17, 0x0c, + 0x6c, 0x26, 0x52, 0xc3, 0xb3, 0xd4, 0x67, 0xf5, + 0xe3, 0x04, 0xb7, 0xf4, 0xcb, 0x80, 0xb8, 0xcb, + 0x77, 0x56, 0x3e, 0xaa, 0x57, 0x54, 0xee, 0xb4, + 0x2c, 0x67, 0xcf, 0xf2, 0xdc, 0xbe, 0x55, 0xf9, + 0x43, 0x1f, 0x6e, 0x22, 0x97, 0x67, 0x7f, 0xc4, + 0xef, 0xb1, 0x26, 0x31, 0x1e, 0x27, 0xdf, 0x41, + 0x80, 0x47, 0x6c, 0xe2, 0xfa, 0xa9, 0x8c, 0x2a, + 0xf6, 0xf2, 0xab, 0xf0, 0x15, 0xda, 0x6c, 0xc8, + 0xfe, 0xb5, 0x23, 0xde, 0xa9, 0x05, 0x3f, 0x06, + 0x54, 0x4c, 0xcd, 0xe1, 0xab, 0xfc, 0x0e, 0x62, + 0x33, 0x31, 0x73, 0x2c, 0x76, 0xcb, 0xb4, 0x47, + 0x1e, 0x20, 0xad, 0xd8, 0xf2, 0x31, 0xdd, 0xc4, + 0x8b, 0x0c, 0x77, 0xbe, 0xe1, 0x8b, 0x26, 0x00, + 0x02, 0x58, 0xd6, 0x8d, 0xef, 0xad, 0x74, 0x67, + 0xab, 0x3f, 0xef, 0xcb, 0x6f, 0xb0, 0xcc, 0x81, + 0x44, 0x4c, 0xaf, 0xe9, 0x49, 0x4f, 0xdb, 0xa0, + 0x25, 0xa4, 0xf0, 0x89, 0xf1, 0xbe, 0xd8, 0x10, + 0xff, 0xb1, 0x3b, 0x4b, 0xfa, 0x98, 0xf5, 0x79, + 0x6d, 0x1e, 0x69, 0x4d, 0x57, 0xb1, 0xc8, 0x19, + 0x1b, 0xbd, 0x1e, 0x8c, 0x84, 0xb7, 0x7b, 0xe8, + 0xd2, 0x2d, 0x09, 0x41, 0x41, 0x37, 0x3d, 0xb1, + 0x6f, 0x26, 0x5d, 0x71, 0x16, 0x3d, 0xb7, 0x83, + 0x27, 0x2c, 0xa7, 0xb6, 0x50, 0xbd, 0x91, 0x86, + 0xab, 0x24, 0xa1, 0x38, 0xfd, 0xea, 0x71, 0x55, + 0x7e, 0x9a, 0x07, 0x77, 0x4b, 0xfa, 0x61, 0x66, + 0x20, 0x1e, 0x28, 0x95, 0x18, 0x1b, 0xa4, 0xa0, + 0xfd, 0xc0, 0x89, 0x72, 0x43, 0xd9, 0x3b, 0x49, + 0x5a, 0x3f, 0x9d, 0xbf, 0xdb, 0xb4, 0x46, 0xea, + 0x42, 0x01, 0x77, 0x23, 0x68, 0x95, 0xb6, 0x24, + 0xb3, 0xa8, 0x6c, 0x28, 0x3b, 0x11, 0x40, 0x7e, + 0x18, 0x65, 0x6d, 0xd8, 0x24, 0x42, 0x7d, 0x88, + 0xc0, 0x52, 0xd9, 0x05, 0xe4, 0x95, 0x90, 0x87, + 0x8c, 0xf4, 0xd0, 0x6b, 0xb9, 0x83, 0x99, 0x34, + 0x6d, 0xfe, 0x54, 0x40, 0x94, 0x52, 0x21, 0x4f, + 0x14, 0x25, 0xc5, 0xd6, 0x5e, 0x95, 0xdc, 0x0a, + 0x2b, 0x89, 0x20, 0x11, 0x84, 0x48, 0xd6, 0x3a, + 0xcd, 0x5c, 0x24, 0xad, 0x62, 0xe3, 0xb1, 0x93, + 0x25, 0x8d, 0xcd, 0x7e, 0xfc, 0x27, 0xa3, 0x37, + 0xfd, 0x84, 0xfc, 0x1b, 0xb2, 0xf1, 0x27, 0x38, + 0x5a, 0xb7, 0xfc, 0xf2, 0xfa, 0x95, 0x66, 0xd4, + 0xfb, 0xba, 0xa7, 0xd7, 0xa3, 0x72, 0x69, 0x48, + 0x48, 0x8c, 0xeb, 0x28, 0x89, 0xfe, 0x33, 0x65, + 0x5a, 0x36, 0x01, 0x7e, 0x06, 0x79, 0x0a, 0x09, + 0x3b, 0x74, 0x11, 0x9a, 0x6e, 0xbf, 0xd4, 0x9e, + 0x58, 0x90, 0x49, 0x4f, 0x4d, 0x08, 0xd4, 0xe5, + 0x4a, 0x09, 0x21, 0xef, 0x8b, 0xb8, 0x74, 0x3b, + 0x91, 0xdd, 0x36, 0x85, 0x60, 0x2d, 0xfa, 0xd4, + 0x45, 0x7b, 0x45, 0x53, 0xf5, 0x47, 0x87, 0x7e, + 0xa6, 0x37, 0xc8, 0x78, 0x7a, 0x68, 0x9d, 0x8d, + 0x65, 0x2c, 0x0e, 0x91, 0x5c, 0xa2, 0x60, 0xf0, + 0x8e, 0x3f, 0xe9, 0x1a, 0xcd, 0xaa, 0xe7, 0xd5, + 0x77, 0x18, 0xaf, 0xc9, 0xbc, 0x18, 0xea, 0x48, + 0x1b, 0xfb, 0x22, 0x48, 0x70, 0x16, 0x29, 0x9e, + 0x5b, 0xc1, 0x2c, 0x66, 0x23, 0xbc, 0xf0, 0x1f, + 0xef, 0xaf, 0xe4, 0xd6, 0x04, 0x19, 0x82, 0x7a, + 0x0b, 0xba, 0x4b, 0x46, 0xb1, 0x6a, 0x85, 0x5d, + 0xb4, 0x73, 0xd6, 0x21, 0xa1, 0x71, 0x60, 0x14, + 0xee, 0x0a, 0x77, 0xc4, 0x66, 0x2e, 0xf9, 0x69, + 0x30, 0xaf, 0x41, 0x0b, 0xc8, 0x83, 0x3c, 0x53, + 0x99, 0x19, 0x27, 0x46, 0xf7, 0x41, 0x6e, 0x56, + 0xdc, 0x94, 0x28, 0x67, 0x4e, 0xb7, 0x25, 0x48, + 0x8a, 0xc2, 0xe0, 0x60, 0x96, 0xcc, 0x18, 0xf4, + 0x84, 0xdd, 0xa7, 0x5e, 0x3e, 0x05, 0x0b, 0x26, + 0x26, 0xb2, 0x5c, 0x1f, 0x57, 0x1a, 0x04, 0x7e, + 0x6a, 0xe3, 0x2f, 0xb4, 0x35, 0xb6, 0x38, 0x40, + 0x40, 0xcd, 0x6f, 0x87, 0x2e, 0xef, 0xa3, 0xd7, + 0xa9, 0xc2, 0xe8, 0x0d, 0x27, 0xdf, 0x44, 0x62, + 0x99, 0xa0, 0xfc, 0xcf, 0x81, 0x78, 0xcb, 0xfe, + 0xe5, 0xa0, 0x03, 0x4e, 0x6c, 0xd7, 0xf4, 0xaf, + 0x7a, 0xbb, 0x61, 0x82, 0xfe, 0x71, 0x89, 0xb2, + 0x22, 0x7c, 0x8e, 0x83, 0x04, 0xce, 0xf6, 0x5d, + 0x84, 0x8f, 0x95, 0x6a, 0x7f, 0xad, 0xfd, 0x32, + 0x9c, 0x5e, 0xe4, 0x9c, 0x89, 0x60, 0x54, 0xaa, + 0x96, 0x72, 0xd2, 0xd7, 0x36, 0x85, 0xa9, 0x45, + 0xd2, 0x2a, 0xa1, 0x81, 0x49, 0x6f, 0x7e, 0x04, + 0xfa, 0xe2, 0xfe, 0x90, 0x26, 0x77, 0x5a, 0x33, + 0xb8, 0x04, 0x9a, 0x7a, 0xe6, 0x4c, 0x4f, 0xad, + 0x72, 0x96, 0x08, 0x28, 0x58, 0x13, 0xf8, 0xc4, + 0x1c, 0xf0, 0xc3, 0x45, 0x95, 0x49, 0x20, 0x8c, + 0x9f, 0x39, 0x70, 0xe1, 0x77, 0xfe, 0xd5, 0x4b, + 0xaf, 0x86, 0xda, 0xef, 0x22, 0x06, 0x83, 0x36, + 0x29, 0x12, 0x11, 0x40, 0xbc, 0x3b, 0x86, 0xaa, + 0xaa, 0x65, 0x60, 0xc3, 0x80, 0xca, 0xed, 0xa9, + 0xf3, 0xb0, 0x79, 0x96, 0xa2, 0x55, 0x27, 0x28, + 0x55, 0x73, 0x26, 0xa5, 0x50, 0xea, 0x92, 0x4b, + 0x3c, 0x5c, 0x82, 0x33, 0xf0, 0x01, 0x3f, 0x03, + 0xc1, 0x08, 0x05, 0xbf, 0x98, 0xf4, 0x9b, 0x6d, + 0xa5, 0xa8, 0xb4, 0x82, 0x0c, 0x06, 0xfa, 0xff, + 0x2d, 0x08, 0xf3, 0x05, 0x4f, 0x57, 0x2a, 0x39, + 0xd4, 0x83, 0x0d, 0x75, 0x51, 0xd8, 0x5b, 0x1b, + 0xd3, 0x51, 0x5a, 0x32, 0x2a, 0x9b, 0x32, 0xb2, + 0xf2, 0xa4, 0x96, 0x12, 0xf2, 0xae, 0x40, 0x34, + 0x67, 0xa8, 0xf5, 0x44, 0xd5, 0x35, 0x53, 0xfe, + 0xa3, 0x60, 0x96, 0x63, 0x0f, 0x1f, 0x6e, 0xb0, + 0x5a, 0x42, 0xa6, 0xfc, 0x51, 0x0b, 0x60, 0x27, + 0xbc, 0x06, 0x71, 0xed, 0x65, 0x5b, 0x23, 0x86, + 0x4a, 0x07, 0x3b, 0x22, 0x07, 0x46, 0xe6, 0x90, + 0x3e, 0xf3, 0x25, 0x50, 0x1b, 0x4c, 0x7f, 0x03, + 0x08, 0xa8, 0x36, 0x6b, 0x87, 0xe5, 0xe3, 0xdb, + 0x9a, 0x38, 0x83, 0xff, 0x9f, 0x1a, 0x9f, 0x57, + 0xa4, 0x2a, 0xf6, 0x37, 0xbc, 0x1a, 0xff, 0xc9, + 0x1e, 0x35, 0x0c, 0xc3, 0x7c, 0xa3, 0xb2, 0xe5, + 0xd2, 0xc6, 0xb4, 0x57, 0x47, 0xe4, 0x32, 0x16, + 0x6d, 0xa9, 0xae, 0x64, 0xe6, 0x2d, 0x8d, 0xc5, + 0x8d, 0x50, 0x8e, 0xe8, 0x1a, 0x22, 0x34, 0x2a, + 0xd9, 0xeb, 0x51, 0x90, 0x4a, 0xb1, 0x41, 0x7d, + 0x64, 0xf9, 0xb9, 0x0d, 0xf6, 0x23, 0x33, 0xb0, + 0x33, 0xf4, 0xf7, 0x3f, 0x27, 0x84, 0xc6, 0x0f, + 0x54, 0xa5, 0xc0, 0x2e, 0xec, 0x0b, 0x3a, 0x48, + 0x6e, 0x80, 0x35, 0x81, 0x43, 0x9b, 0x90, 0xb1, + 0xd0, 0x2b, 0xea, 0x21, 0xdc, 0xda, 0x5b, 0x09, + 0xf4, 0xcc, 0x10, 0xb4, 0xc7, 0xfe, 0x79, 0x51, + 0xc3, 0xc5, 0xac, 0x88, 0x74, 0x84, 0x0b, 0x4b, + 0xca, 0x79, 0x16, 0x29, 0xfb, 0x69, 0x54, 0xdf, + 0x41, 0x7e, 0xe9, 0xc7, 0x8e, 0xea, 0xa5, 0xfe, + 0xfc, 0x76, 0x0e, 0x90, 0xc4, 0x92, 0x38, 0xad, + 0x7b, 0x48, 0xe6, 0x6e, 0xf7, 0x21, 0xfd, 0x4e, + 0x93, 0x0a, 0x7b, 0x41, 0x83, 0x68, 0xfb, 0x57, + 0x51, 0x76, 0x34, 0xa9, 0x6c, 0x00, 0xaa, 0x4f, + 0x66, 0x65, 0x98, 0x4a, 0x4f, 0xa3, 0xa0, 0xef, + 0x69, 0x3f, 0xe3, 0x1c, 0x92, 0x8c, 0xfd, 0xd8, + 0xe8, 0xde, 0x7c, 0x7f, 0x3e, 0x84, 0x8e, 0x69, + 0x3c, 0xf1, 0xf2, 0x05, 0x46, 0xdc, 0x2f, 0x9d, + 0x5e, 0x6e, 0x4c, 0xfb, 0xb5, 0x99, 0x2a, 0x59, + 0x63, 0xc1, 0x34, 0xbc, 0x57, 0xc0, 0x0d, 0xb9, + 0x61, 0x25, 0xf3, 0x33, 0x23, 0x51, 0xb6, 0x0d, + 0x07, 0xa6, 0xab, 0x94, 0x4a, 0xb7, 0x2a, 0xea, + 0xee, 0xac, 0xa3, 0xc3, 0x04, 0x8b, 0x0e, 0x56, + 0xfe, 0x44, 0xa7, 0x39, 0xe2, 0xed, 0xed, 0xb4, + 0x22, 0x2b, 0xac, 0x12, 0x32, 0x28, 0x91, 0xd8, + 0xa5, 0xab, 0xff, 0x5f, 0xe0, 0x4b, 0xda, 0x78, + 0x17, 0xda, 0xf1, 0x01, 0x5b, 0xcd, 0xe2, 0x5f, + 0x50, 0x45, 0x73, 0x2b, 0xe4, 0x76, 0x77, 0xf4, + 0x64, 0x1d, 0x43, 0xfb, 0x84, 0x7a, 0xea, 0x91, + 0xae, 0xf9, 0x9e, 0xb7, 0xb4, 0xb0, 0x91, 0x5f, + 0x16, 0x35, 0x9a, 0x11, 0xb8, 0xc7, 0xc1, 0x8c, + 0xc6, 0x10, 0x8d, 0x2f, 0x63, 0x4a, 0xa7, 0x57, + 0x3a, 0x51, 0xd6, 0x32, 0x2d, 0x64, 0x72, 0xd4, + 0x66, 0xdc, 0x10, 0xa6, 0x67, 0xd6, 0x04, 0x23, + 0x9d, 0x0a, 0x11, 0x77, 0xdd, 0x37, 0x94, 0x17, + 0x3c, 0xbf, 0x8b, 0x65, 0xb0, 0x2e, 0x5e, 0x66, + 0x47, 0x64, 0xac, 0xdd, 0xf0, 0x84, 0xfd, 0x39, + 0xfa, 0x15, 0x5d, 0xef, 0xae, 0xca, 0xc1, 0x36, + 0xa7, 0x5c, 0xbf, 0xc7, 0x08, 0xc2, 0x66, 0x00, + 0x74, 0x74, 0x4e, 0x27, 0x3f, 0x55, 0x8a, 0xb7, + 0x38, 0x66, 0x83, 0x6d, 0xcf, 0x99, 0x9e, 0x60, + 0x8f, 0xdd, 0x2e, 0x62, 0x22, 0x0e, 0xef, 0x0c, + 0x98, 0xa7, 0x85, 0x74, 0x3b, 0x9d, 0xec, 0x9e, + 0xa9, 0x19, 0x72, 0xa5, 0x7f, 0x2c, 0x39, 0xb7, + 0x7d, 0xb7, 0xf1, 0x12, 0x65, 0x27, 0x4b, 0x5a, + 0xde, 0x17, 0xfe, 0xad, 0x44, 0xf3, 0x20, 0x4d, + 0xfd, 0xe4, 0x1f, 0xb5, 0x81, 0xb0, 0x36, 0x37, + 0x08, 0x6f, 0xc3, 0x0c, 0xe9, 0x85, 0x98, 0x82, + 0xa9, 0x62, 0x0c, 0xc4, 0x97, 0xc0, 0x50, 0xc8, + 0xa7, 0x3c, 0x50, 0x9f, 0x43, 0xb9, 0xcd, 0x5e, + 0x4d, 0xfa, 0x1c, 0x4b, 0x0b, 0xa9, 0x98, 0x85, + 0x38, 0x92, 0xac, 0x8d, 0xe4, 0xad, 0x9b, 0x98, + 0xab, 0xd9, 0x38, 0xac, 0x62, 0x52, 0xa3, 0x22, + 0x63, 0x0f, 0xbf, 0x95, 0x48, 0xdf, 0x69, 0xe7, + 0x8b, 0x33, 0xd5, 0xb2, 0xbd, 0x05, 0x49, 0x49, + 0x9d, 0x57, 0x73, 0x19, 0x33, 0xae, 0xfa, 0x33, + 0xf1, 0x19, 0xa8, 0x80, 0xce, 0x04, 0x9f, 0xbc, + 0x1d, 0x65, 0x82, 0x1b, 0xe5, 0x3a, 0x51, 0xc8, + 0x1c, 0x21, 0xe3, 0x5d, 0xf3, 0x7d, 0x9b, 0x2f, + 0x2c, 0x1d, 0x4a, 0x7f, 0x9b, 0x68, 0x35, 0xa3, + 0xb2, 0x50, 0xf7, 0x62, 0x79, 0xcd, 0xf4, 0x98, + 0x4f, 0xe5, 0x63, 0x7c, 0x3e, 0x45, 0x31, 0x8c, + 0x16, 0xa0, 0x12, 0xc8, 0x58, 0xce, 0x39, 0xa6, + 0xbc, 0x54, 0xdb, 0xc5, 0xe0, 0xd5, 0xba, 0xbc, + 0xb9, 0x04, 0xf4, 0x8d, 0xe8, 0x2f, 0x15, 0x9d, +}; + +/* 100 test cases */ +static struct dahash_test { + uint16_t start; /* random 12 bit offset in buf */ + uint16_t length; /* random 8 bit length of test */ + xfs_dahash_t dahash; /* expected dahash result */ +} test[] __initdata = +{ + {0x0567, 0x0097, 0x96951389}, + {0x0869, 0x0055, 0x6455ab4f}, + {0x0c51, 0x00be, 0x8663afde}, + {0x044a, 0x00fc, 0x98fbe432}, + {0x0f29, 0x0079, 0x42371997}, + {0x08ba, 0x0052, 0x942be4f7}, + {0x01f2, 0x0013, 0x5262687e}, + {0x09e3, 0x00e2, 0x8ffb0908}, + {0x007c, 0x0051, 0xb3158491}, + {0x0854, 0x001f, 0x83bb20d9}, + {0x031b, 0x0008, 0x98970bdf}, + {0x0de7, 0x0027, 0xbfbf6f6c}, + {0x0f76, 0x0005, 0x906a7105}, + {0x092e, 0x00d0, 0x86631850}, + {0x0233, 0x0082, 0xdbdd914e}, + {0x04c9, 0x0075, 0x5a400a9e}, + {0x0b66, 0x0099, 0xae128b45}, + {0x000d, 0x00ed, 0xe61c216a}, + {0x0a31, 0x003d, 0xf69663b9}, + {0x00a3, 0x0052, 0x643c39ae}, + {0x0125, 0x00d5, 0x7c310b0d}, + {0x0105, 0x004a, 0x06a77e74}, + {0x0858, 0x008e, 0x265bc739}, + {0x045e, 0x0095, 0x13d6b192}, + {0x0dab, 0x003c, 0xc4498704}, + {0x00cd, 0x00b5, 0x802a4e2d}, + {0x069b, 0x008c, 0x5df60f71}, + {0x0454, 0x006c, 0x5f03d8bb}, + {0x040e, 0x0032, 0x0ce513b5}, + {0x0874, 0x00e2, 0x6a811fb3}, + {0x0521, 0x00b4, 0x93296833}, + {0x0ddc, 0x00cf, 0xf9305338}, + {0x0a70, 0x0023, 0x239549ea}, + {0x083e, 0x0027, 0x2d88ba97}, + {0x0241, 0x00a7, 0xfe0b32e1}, + {0x0dfc, 0x0096, 0x1a11e815}, + {0x023e, 0x001e, 0xebc9a1f3}, + {0x067e, 0x0066, 0xb1067f81}, + {0x09ea, 0x000e, 0x46fd7247}, + {0x036b, 0x008c, 0x1a39acdf}, + {0x078f, 0x0030, 0x964042ab}, + {0x085c, 0x008f, 0x1829edab}, + {0x02ec, 0x009f, 0x6aefa72d}, + {0x043b, 0x00ce, 0x65642ff5}, + {0x0a32, 0x00b8, 0xbd82759e}, + {0x0d3c, 0x0087, 0xf4d66d54}, + {0x09ec, 0x008a, 0x06bfa1ff}, + {0x0902, 0x0015, 0x755025d2}, + {0x08fe, 0x000e, 0xf690ce2d}, + {0x00fb, 0x00dc, 0xe55f1528}, + {0x0eaa, 0x003a, 0x0fe0a8d7}, + {0x05fb, 0x0006, 0x86281cfb}, + {0x0dd1, 0x00a7, 0x60ab51b4}, + {0x0005, 0x001b, 0xf51d969b}, + {0x077c, 0x00dd, 0xc2fed268}, + {0x0575, 0x00f5, 0x432c0b1a}, + {0x05be, 0x0088, 0x78baa04b}, + {0x0c89, 0x0068, 0xeda9e428}, + {0x0f5c, 0x0068, 0xec143c76}, + {0x06a8, 0x0009, 0xd72651ce}, + {0x060f, 0x008e, 0x765426cd}, + {0x07b1, 0x0047, 0x2cfcfa0c}, + {0x04f1, 0x0041, 0x55b172f9}, + {0x0e05, 0x00ac, 0x61efde93}, + {0x0bf7, 0x0097, 0x05b83eee}, + {0x04e9, 0x00f3, 0x9928223a}, + {0x023a, 0x0005, 0xdfada9bc}, + {0x0acb, 0x000e, 0x2217cecd}, + {0x0148, 0x0060, 0xbc3f7405}, + {0x0764, 0x0059, 0xcbc201b1}, + {0x021f, 0x0059, 0x5d6b2256}, + {0x0f1e, 0x006c, 0xdefeeb45}, + {0x071c, 0x00b9, 0xb9b59309}, + {0x0564, 0x0063, 0xae064271}, + {0x0b14, 0x0044, 0xdb867d9b}, + {0x0e5a, 0x0055, 0xff06b685}, + {0x015e, 0x00ba, 0x1115ccbc}, + {0x0379, 0x00e6, 0x5f4e58dd}, + {0x013b, 0x0067, 0x4897427e}, + {0x0e64, 0x0071, 0x7af2b7a4}, + {0x0a11, 0x0050, 0x92105726}, + {0x0109, 0x0055, 0xd0d000f9}, + {0x00aa, 0x0022, 0x815d229d}, + {0x09ac, 0x004f, 0x02f9d985}, + {0x0e1b, 0x00ce, 0x5cf92ab4}, + {0x08af, 0x00d8, 0x17ca72d1}, + {0x0e33, 0x000a, 0xda2dba6b}, + {0x0ee3, 0x006a, 0xb00048e5}, + {0x0648, 0x001a, 0x2364b8cb}, + {0x0315, 0x0085, 0x0596fd0d}, + {0x0fbb, 0x003e, 0x298230ca}, + {0x0422, 0x006a, 0x78ada4ab}, + {0x04ba, 0x0073, 0xced1fbc2}, + {0x007d, 0x0061, 0x4b7ff236}, + {0x070b, 0x00d0, 0x261cf0ae}, + {0x0c1a, 0x0035, 0x8be92ee2}, + {0x0af8, 0x0063, 0x824dcf03}, + {0x08f8, 0x006d, 0xd289710c}, + {0x021b, 0x00ee, 0x6ac1c41d}, + {0x05b5, 0x00da, 0x8e52f0e2}, +}; + +int __init +xfs_dahash_test(void) +{ + unsigned int i; + unsigned int errors = 0; + + for (i = 0; i < ARRAY_SIZE(test); i++) { + xfs_dahash_t hash; + + hash = xfs_da_hashname(test_buf + test[i].start, + test[i].length); + if (hash != test[i].dahash) + errors++; + } + + if (errors) { + printk(KERN_ERR "xfs dir/attr hash test failed %u times!", + errors); + return -ERANGE; + } + + return 0; +} diff --git a/fs/xfs/xfs_dahash_test.h b/fs/xfs/xfs_dahash_test.h new file mode 100644 index 000000000000..1a05bf4bd9e1 --- /dev/null +++ b/fs/xfs/xfs_dahash_test.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_DAHASH_TEST_H__ +#define __XFS_DAHASH_TEST_H__ + +int xfs_dahash_test(void); + +#endif /* __XFS_DAHASH_TEST_H__ */ + diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2479b5cbd75e..4f814f9e12ab 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -41,6 +41,7 @@ #include "xfs_attr_item.h" #include "xfs_xattr.h" #include "xfs_iunlink_item.h" +#include "xfs_dahash_test.h" #include #include @@ -2286,6 +2287,10 @@ init_xfs_fs(void) xfs_check_ondisk_structs(); + error = xfs_dahash_test(); + if (error) + return error; + printk(KERN_INFO XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"); -- cgit From 1470afefc3c42df5d1662f87d079b46651bdc95b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:02 -0700 Subject: cpumask: introduce for_each_cpu_or Equivalent of for_each_cpu_and, except it ORs the two masks together so it iterates all the CPUs present in either mask. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/cpumask.h | 17 +++++++++++++++++ include/linux/find.h | 37 +++++++++++++++++++++++++++++++++++++ lib/find_bit.c | 9 +++++++++ 3 files changed, 63 insertions(+) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 8fbe76607965..220974ef1bf5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -350,6 +350,23 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta #define for_each_cpu_andnot(cpu, mask1, mask2) \ for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) +/** + * for_each_cpu_or - iterate over every cpu present in either mask + * @cpu: the (optionally unsigned) integer iterator + * @mask1: the first cpumask pointer + * @mask2: the second cpumask pointer + * + * This saves a temporary CPU mask in many places. It is equivalent to: + * struct cpumask tmp; + * cpumask_or(&tmp, &mask1, &mask2); + * for_each_cpu(cpu, &tmp) + * ... + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_or(cpu, mask1, mask2) \ + for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) + /** * cpumask_any_but - return a "random" in a cpumask, but not this one. * @mask: the cpumask to search diff --git a/include/linux/find.h b/include/linux/find.h index 4647864a5ffd..5e4f39ef2e72 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -14,6 +14,8 @@ unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long unsigned long nbits, unsigned long start); unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start); +unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start); unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); @@ -127,6 +129,36 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1, } #endif +#ifndef find_next_or_bit +/** + * find_next_or_bit - find the next set bit in either memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_or_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = (*addr1 | *addr2) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_or_bit(addr1, addr2, size, offset); +} +#endif + #ifndef find_next_zero_bit /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -536,6 +568,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ (bit)++) +#define for_each_or_bit(bit, addr1, addr2, size) \ + for ((bit) = 0; \ + (bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ + (bit)++) + /* same as for_each_set_bit() but use bit as value to start with */ #define for_each_set_bit_from(bit, addr, size) \ for (; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++) diff --git a/lib/find_bit.c b/lib/find_bit.c index c10920e66788..32f99e9a670e 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -182,6 +182,15 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l EXPORT_SYMBOL(_find_next_andnot_bit); #endif +#ifndef find_next_or_bit +unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start) +{ + return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start); +} +EXPORT_SYMBOL(_find_next_or_bit); +#endif + #ifndef find_next_zero_bit unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start) -- cgit From 8b57b11cca88f397035a95b9e12b03511847b0e8 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:02 -0700 Subject: pcpcntrs: fix dying cpu summation race In commit f689054aace2 ("percpu_counter: add percpu_counter_sum_all interface") a race condition between a cpu dying and percpu_counter_sum() iterating online CPUs was identified. The solution was to iterate all possible CPUs for summation via percpu_counter_sum_all(). We recently had a percpu_counter_sum() call in XFS trip over this same race condition and it fired a debug assert because the filesystem was unmounting and the counter *should* be zero just before we destroy it. That was reported here: https://lore.kernel.org/linux-kernel/20230314090649.326642-1-yebin@huaweicloud.com/ likely as a result of running generic/648 which exercises filesystems in the presence of CPU online/offline events. The solution to use percpu_counter_sum_all() is an awful one. We use percpu counters and percpu_counter_sum() for accurate and reliable threshold detection for space management, so a summation race condition during these operations can result in overcommit of available space and that may result in filesystem shutdowns. As percpu_counter_sum_all() iterates all possible CPUs rather than just those online or even those present, the mask can include CPUs that aren't even installed in the machine, or in the case of machines that can hot-plug CPU capable nodes, even have physical sockets present in the machine. Fundamentally, this race condition is caused by the CPU being offlined being removed from the cpu_online_mask before the notifier that cleans up per-cpu state is run. Hence percpu_counter_sum() will not sum the count for a cpu currently being taken offline, regardless of whether the notifier has run or not. This is the root cause of the bug. The percpu counter notifier iterates all the registered counters, locks the counter and moves the percpu count to the global sum. This is serialised against other operations that move the percpu counter to the global sum as well as percpu_counter_sum() operations that sum the percpu counts while holding the counter lock. Hence the notifier is safe to run concurrently with sum operations, and the only thing we actually need to care about is that percpu_counter_sum() iterates dying CPUs. That's trivial to do, and when there are no CPUs dying, it has no addition overhead except for a cpumask_or() operation. This change makes percpu_counter_sum() always do the right thing in the presence of CPU hot unplug events and makes percpu_counter_sum_all() unnecessary. This, in turn, means that filesystems like XFS, ext4, and btrfs don't have to work out when they should use percpu_counter_sum() vs percpu_counter_sum_all() in their space accounting algorithms Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- lib/percpu_counter.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index dba56c5c1837..0e096311e0c0 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -131,7 +131,7 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; - for_each_cpu(cpu, cpu_mask) { + for_each_cpu_or(cpu, cpu_online_mask, cpu_mask) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } @@ -141,11 +141,20 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, /* * Add up all the per-cpu counts, return the result. This is a more accurate - * but much slower version of percpu_counter_read_positive() + * but much slower version of percpu_counter_read_positive(). + * + * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums + * from CPUs that are in the process of being taken offline. Dying cpus have + * been removed from the online mask, but may not have had the hotplug dead + * notifier called to fold the percpu count back into the global counter sum. + * By including dying CPUs in the iteration mask, we avoid this race condition + * so __percpu_counter_sum() just does the right thing when CPUs are being taken + * offline. */ s64 __percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum_mask(fbc, cpu_online_mask); + + return __percpu_counter_sum_mask(fbc, cpu_dying_mask); } EXPORT_SYMBOL(__percpu_counter_sum); -- cgit From 7ba85fba47bd89618fdb7dc322bdf823b1b56efb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:03 -0700 Subject: fork: remove use of percpu_counter_sum_all This effectively reverts the change made in commit f689054aace2 ("percpu_counter: add percpu_counter_sum_all interface") as the race condition percpu_counter_sum_all() was invented to avoid is now handled directly in percpu_counter_sum() and nobody needs to care about summing racing with cpu unplug anymore. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- kernel/fork.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index f68954d05e89..bcc3085e79ef 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -755,11 +755,6 @@ static void check_mm(struct mm_struct *mm) for (i = 0; i < NR_MM_COUNTERS; i++) { long x = percpu_counter_sum(&mm->rss_stat[i]); - if (likely(!x)) - continue; - - /* Making sure this is not due to race with CPU offlining. */ - x = percpu_counter_sum_all(&mm->rss_stat[i]); if (unlikely(x)) pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", mm, resident_page_types[i], x); -- cgit From e9b60c7f97130795c7aa81a649ae4b93a172a277 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:03 -0700 Subject: pcpcntr: remove percpu_counter_sum_all() percpu_counter_sum_all() is now redundant as the race condition it was invented to handle is now dealt with by percpu_counter_sum() directly and all users of percpu_counter_sum_all() have been removed. Remove it. This effectively reverts the changes made in f689054aace2 ("percpu_counter: add percpu_counter_sum_all interface") except for the cpumask iteration that fixes percpu_counter_sum() made earlier in this series. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/percpu_counter.h | 6 ------ lib/percpu_counter.c | 40 +++++++++++----------------------------- 2 files changed, 11 insertions(+), 35 deletions(-) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 521a733e21a9..75b73c83bc9d 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -45,7 +45,6 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -s64 percpu_counter_sum_all(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); @@ -196,11 +195,6 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc) return percpu_counter_read(fbc); } -static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc) -{ - return percpu_counter_read(fbc); -} - static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return true; diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 0e096311e0c0..5004463c4f9f 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -122,23 +122,6 @@ void percpu_counter_sync(struct percpu_counter *fbc) } EXPORT_SYMBOL(percpu_counter_sync); -static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, - const struct cpumask *cpu_mask) -{ - s64 ret; - int cpu; - unsigned long flags; - - raw_spin_lock_irqsave(&fbc->lock, flags); - ret = fbc->count; - for_each_cpu_or(cpu, cpu_online_mask, cpu_mask) { - s32 *pcount = per_cpu_ptr(fbc->counters, cpu); - ret += *pcount; - } - raw_spin_unlock_irqrestore(&fbc->lock, flags); - return ret; -} - /* * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive(). @@ -153,22 +136,21 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, */ s64 __percpu_counter_sum(struct percpu_counter *fbc) { + s64 ret; + int cpu; + unsigned long flags; - return __percpu_counter_sum_mask(fbc, cpu_dying_mask); + raw_spin_lock_irqsave(&fbc->lock, flags); + ret = fbc->count; + for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { + s32 *pcount = per_cpu_ptr(fbc->counters, cpu); + ret += *pcount; + } + raw_spin_unlock_irqrestore(&fbc->lock, flags); + return ret; } EXPORT_SYMBOL(__percpu_counter_sum); -/* - * This is slower version of percpu_counter_sum as it traverses all possible - * cpus. Use this only in the cases where accurate data is needed in the - * presense of CPUs getting offlined. - */ -s64 percpu_counter_sum_all(struct percpu_counter *fbc) -{ - return __percpu_counter_sum_mask(fbc, cpu_possible_mask); -} -EXPORT_SYMBOL(percpu_counter_sum_all); - int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { -- cgit From e400be674a1a40e9dcb2e95f84d6c1fd2d88f31d Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Tue, 14 Mar 2023 10:37:07 +0900 Subject: tracing: Make splice_read available again Since the commit 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") is applied to the kernel, splice() and sendfile() calls on the trace file (/sys/kernel/debug/tracing /trace) return EINVAL. This patch restores these system calls by initializing splice_read in file_operations of the trace file. This patch only enables such functionalities for the read case. Link: https://lore.kernel.org/linux-trace-kernel/20230314013707.28814-1-sfoon.kim@samsung.com Cc: stable@vger.kernel.org Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") Signed-off-by: Sung-hun Kim Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fbb602a8b64b..4e9a7a952025 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5164,6 +5164,8 @@ loff_t tracing_lseek(struct file *file, loff_t offset, int whence) static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, + .read_iter = seq_read_iter, + .splice_read = generic_file_splice_read, .write = tracing_write_stub, .llseek = tracing_lseek, .release = tracing_release, -- cgit From a98151ad53b53f010ee364ec2fd06445b328578b Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 15 Mar 2023 15:24:46 +0100 Subject: ring-buffer: remove obsolete comment for free_buffer_page() The comment refers to mm/slob.c which is being removed. It comes from commit ed56829cb319 ("ring_buffer: reset buffer page when freeing") and according to Steven the borrowed code was a page mapcount and mapping reset, which was later removed by commit e4c2ce82ca27 ("ring_buffer: allocate buffer page pointer"). Thus the comment is not accurate anyway, remove it. Link: https://lore.kernel.org/linux-trace-kernel/20230315142446.27040-1-vbabka@suse.cz Cc: Masami Hiramatsu Cc: Ingo Molnar Reported-by: Mike Rapoport Suggested-by: Steven Rostedt (Google) Fixes: e4c2ce82ca27 ("ring_buffer: allocate buffer page pointer") Signed-off-by: Vlastimil Babka Reviewed-by: Mukesh Ojha Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 071184324d18..3c7cd135333f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,10 +354,6 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } -/* - * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing - * this issue out. - */ static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); -- cgit From 71c7a30442b724717a30d5e7d1662ba4904eb3d4 Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Thu, 16 Mar 2023 16:45:35 +0200 Subject: tracing/hwlat: Replace sched_setaffinity with set_cpus_allowed_ptr There is a problem with the behavior of hwlat in a container, resulting in incorrect output. A warning message is generated: "cpumask changed while in round-robin mode, switching to mode none", and the tracing_cpumask is ignored. This issue arises because the kernel thread, hwlatd, is not a part of the container, and the function sched_setaffinity is unable to locate it using its PID. Additionally, the task_struct of hwlatd is already known. Ultimately, the function set_cpus_allowed_ptr achieves the same outcome as sched_setaffinity, but employs task_struct instead of PID. Test case: # cd /sys/kernel/tracing # echo 0 > tracing_on # echo round-robin > hwlat_detector/mode # echo hwlat > current_tracer # unshare --fork --pid bash -c 'echo 1 > tracing_on' # dmesg -c Actual behavior: [573502.809060] hwlat_detector: cpumask changed while in round-robin mode, switching to mode none Link: https://lore.kernel.org/linux-trace-kernel/20230316144535.1004952-1-costa.shul@redhat.com Cc: Masami Hiramatsu Fixes: 0330f7aa8ee63 ("tracing: Have hwlat trace migrate across tracing_cpumask CPUs") Signed-off-by: Costa Shulyupin Acked-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_hwlat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index c4945f8adc11..2f37a6e68aa9 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -339,7 +339,7 @@ static void move_to_next_cpu(void) cpumask_clear(current_mask); cpumask_set_cpu(next_cpu, current_mask); - sched_setaffinity(0, current_mask); + set_cpus_allowed_ptr(current, current_mask); return; change_mode: @@ -446,7 +446,7 @@ static int start_single_kthread(struct trace_array *tr) } - sched_setaffinity(kthread->pid, current_mask); + set_cpus_allowed_ptr(kthread, current_mask); kdata->kthread = kthread; wake_up_process(kthread); -- cgit From e8d018dd0257f744ca50a729e3d042cf2ec9da65 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Mar 2023 13:27:55 -0700 Subject: Linux 6.3-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b1506df8b8d8..a2c310df2145 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit From 1716efdb07938bd6510e1127d02012799112c433 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 10 Mar 2023 11:20:49 -0600 Subject: thunderbolt: Use const qualifier for `ring_interrupt_index` `ring_interrupt_index` doesn't change the data for `ring` so mark it as const. This is needed by the following patch that disables interrupt auto clear for rings. Cc: Sanju Mehta Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Mika Westerberg --- drivers/thunderbolt/nhi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 4dce2edd86ea..fdc0c3ba2ef0 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -46,7 +46,7 @@ #define QUIRK_AUTO_CLEAR_INT BIT(0) #define QUIRK_E2E BIT(1) -static int ring_interrupt_index(struct tb_ring *ring) +static int ring_interrupt_index(const struct tb_ring *ring) { int bit = ring->hop; if (!ring->is_tx) -- cgit From 468c49f44759720a312e52d44a71c3949ed63d7c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 10 Mar 2023 11:20:50 -0600 Subject: thunderbolt: Disable interrupt auto clear for rings When interrupt auto clear is programmed, any read to the interrupt status register will clear all interrupts. If two interrupts have come in before one can be serviced then this will cause lost interrupts. On AMD USB4 routers this has manifested in odd problems particularly with long strings of control tranfers such as reading the DROM via bit banging. Instead of clearing interrupts automatically, clear the bit corresponding to the given ring's interrupt in the ISR. Fixes: 7a1808f82a37 ("thunderbolt: Handle ring interrupt by reading interrupt status register") Cc: Sanju Mehta Cc: stable@vger.kernel.org Tested-by: Anson Tsao Signed-off-by: Mario Limonciello Signed-off-by: Mika Westerberg --- drivers/thunderbolt/nhi.c | 40 +++++++++++++++++++++++++--------------- drivers/thunderbolt/nhi_regs.h | 6 ++++-- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index fdc0c3ba2ef0..318d20bd5b69 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -71,24 +71,31 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active) u32 step, shift, ivr, misc; void __iomem *ivr_base; int index; + int bit; if (ring->is_tx) index = ring->hop; else index = ring->hop + ring->nhi->hop_count; - if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT) { - /* - * Ask the hardware to clear interrupt status - * bits automatically since we already know - * which interrupt was triggered. - */ - misc = ioread32(ring->nhi->iobase + REG_DMA_MISC); - if (!(misc & REG_DMA_MISC_INT_AUTO_CLEAR)) { - misc |= REG_DMA_MISC_INT_AUTO_CLEAR; - iowrite32(misc, ring->nhi->iobase + REG_DMA_MISC); - } - } + /* + * Intel routers support a bit that isn't part of + * the USB4 spec to ask the hardware to clear + * interrupt status bits automatically since + * we already know which interrupt was triggered. + * + * Other routers explicitly disable auto-clear + * to prevent conditions that may occur where two + * MSIX interrupts are simultaneously active and + * reading the register clears both of them. + */ + misc = ioread32(ring->nhi->iobase + REG_DMA_MISC); + if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT) + bit = REG_DMA_MISC_INT_AUTO_CLEAR; + else + bit = REG_DMA_MISC_DISABLE_AUTO_CLEAR; + if (!(misc & bit)) + iowrite32(misc | bit, ring->nhi->iobase + REG_DMA_MISC); ivr_base = ring->nhi->iobase + REG_INT_VEC_ALLOC_BASE; step = index / REG_INT_VEC_ALLOC_REGS * REG_INT_VEC_ALLOC_BITS; @@ -393,14 +400,17 @@ EXPORT_SYMBOL_GPL(tb_ring_poll_complete); static void ring_clear_msix(const struct tb_ring *ring) { + int bit; + if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT) return; + bit = ring_interrupt_index(ring) & 31; if (ring->is_tx) - ioread32(ring->nhi->iobase + REG_RING_NOTIFY_BASE); + iowrite32(BIT(bit), ring->nhi->iobase + REG_RING_INT_CLEAR); else - ioread32(ring->nhi->iobase + REG_RING_NOTIFY_BASE + - 4 * (ring->nhi->hop_count / 32)); + iowrite32(BIT(bit), ring->nhi->iobase + REG_RING_INT_CLEAR + + 4 * (ring->nhi->hop_count / 32)); } static irqreturn_t ring_msix(int irq, void *data) diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h index 0d4970dcef84..faef165a919c 100644 --- a/drivers/thunderbolt/nhi_regs.h +++ b/drivers/thunderbolt/nhi_regs.h @@ -77,12 +77,13 @@ struct ring_desc { /* * three bitfields: tx, rx, rx overflow - * Every bitfield contains one bit for every hop (REG_HOP_COUNT). Registers are - * cleared on read. New interrupts are fired only after ALL registers have been + * Every bitfield contains one bit for every hop (REG_HOP_COUNT). + * New interrupts are fired only after ALL registers have been * read (even those containing only disabled rings). */ #define REG_RING_NOTIFY_BASE 0x37800 #define RING_NOTIFY_REG_COUNT(nhi) ((31 + 3 * nhi->hop_count) / 32) +#define REG_RING_INT_CLEAR 0x37808 /* * two bitfields: rx, tx @@ -105,6 +106,7 @@ struct ring_desc { #define REG_DMA_MISC 0x39864 #define REG_DMA_MISC_INT_AUTO_CLEAR BIT(2) +#define REG_DMA_MISC_DISABLE_AUTO_CLEAR BIT(17) #define REG_INMAIL_DATA 0x39900 -- cgit From 364ac7863fc161841e86388884bb7d5f4048031a Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Wed, 1 Mar 2023 12:10:49 -0800 Subject: drm/i915/mtl: Fix Wa_16015201720 implementation The commit 2357f2b271ad ("drm/i915/mtl: Initial display workarounds") extended the workaround Wa_16015201720 to MTL. However the registers that the original WA implemented moved for MTL. Implement the workaround with the correct register. v3: Skip clock gating for pipe C, D DMC's and fix the title Fixes: 2357f2b271ad ("drm/i915/mtl: Initial display workarounds") Cc: Matt Atwood Cc: Lucas De Marchi Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230301201053.928709-2-radhakrishna.sripada@intel.com (cherry picked from commit 0188be507b973e36f637ba010a369057c8cb7282) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dmc.c | 26 +++++++++++++++++++++----- drivers/gpu/drm/i915/i915_reg.h | 8 +++++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 257aa2b7cf20..3485d5e6dd3c 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -384,15 +384,12 @@ static void disable_all_event_handlers(struct drm_i915_private *i915) } } -static void pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable) +static void adlp_pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable) { enum pipe pipe; - if (DISPLAY_VER(i915) < 13) - return; - /* - * Wa_16015201720:adl-p,dg2, mtl + * Wa_16015201720:adl-p,dg2 * The WA requires clock gating to be disabled all the time * for pipe A and B. * For pipe C and D clock gating needs to be disabled only @@ -408,6 +405,25 @@ static void pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable) PIPEDMC_GATING_DIS, 0); } +static void mtl_pipedmc_clock_gating_wa(struct drm_i915_private *i915) +{ + /* + * Wa_16015201720 + * The WA requires clock gating to be disabled all the time + * for pipe A and B. + */ + intel_de_rmw(i915, GEN9_CLKGATE_DIS_0, 0, + MTL_PIPEDMC_GATING_DIS_A | MTL_PIPEDMC_GATING_DIS_B); +} + +static void pipedmc_clock_gating_wa(struct drm_i915_private *i915, bool enable) +{ + if (DISPLAY_VER(i915) >= 14 && enable) + mtl_pipedmc_clock_gating_wa(i915); + else if (DISPLAY_VER(i915) == 13) + adlp_pipedmc_clock_gating_wa(i915, enable); +} + void intel_dmc_enable_pipe(struct drm_i915_private *i915, enum pipe pipe) { if (!has_dmc_id_fw(i915, PIPE_TO_DMC_ID(pipe))) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3b2642397b82..19b047875e62 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1786,9 +1786,11 @@ * GEN9 clock gating regs */ #define GEN9_CLKGATE_DIS_0 _MMIO(0x46530) -#define DARBF_GATING_DIS (1 << 27) -#define PWM2_GATING_DIS (1 << 14) -#define PWM1_GATING_DIS (1 << 13) +#define DARBF_GATING_DIS REG_BIT(27) +#define MTL_PIPEDMC_GATING_DIS_A REG_BIT(15) +#define MTL_PIPEDMC_GATING_DIS_B REG_BIT(14) +#define PWM2_GATING_DIS REG_BIT(14) +#define PWM1_GATING_DIS REG_BIT(13) #define GEN9_CLKGATE_DIS_3 _MMIO(0x46538) #define TGL_VRH_GATING_DIS REG_BIT(31) -- cgit From ed00eba03474adbf525ff03d69705d8c78b76456 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 1 Mar 2023 12:10:52 -0800 Subject: drm/i915/fbdev: lock the fbdev obj before vma pin lock the fbdev obj before calling into i915_vma_pin_iomap(). This helps to solve below : <7>[ 93.563308] i915 0000:00:02.0: [drm:intelfb_create [i915]] no BIOS fb, allocating a new one <4>[ 93.581844] ------------[ cut here ]------------ <4>[ 93.581855] WARNING: CPU: 12 PID: 625 at drivers/gpu/drm/i915/gem/i915_gem_pages.c:424 i915_gem_object_pin_map+0x152/0x1c0 [i915] Fixes: f0b6b01b3efe ("drm/i915: Add ww context to intel_dpt_pin, v2.") Cc: Chris Wilson Cc: Matthew Auld Cc: Maarten Lankhorst Signed-off-by: Tejas Upadhyay Signed-off-by: Radhakrishna Sripada Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20230301201053.928709-5-radhakrishna.sripada@intel.com (cherry picked from commit 561b31acfd65502a2cda2067513240fc57ccdbdc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_fbdev.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index f76b06293eb9..38825b30db16 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -210,6 +210,7 @@ static int intelfb_create(struct drm_fb_helper *helper, bool prealloc = false; void __iomem *vaddr; struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; int ret; mutex_lock(&ifbdev->hpd_lock); @@ -283,13 +284,24 @@ static int intelfb_create(struct drm_fb_helper *helper, info->fix.smem_len = vma->size; } - vaddr = i915_vma_pin_iomap(vma); - if (IS_ERR(vaddr)) { - drm_err(&dev_priv->drm, - "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); - ret = PTR_ERR(vaddr); - goto out_unpin; + for_i915_gem_ww(&ww, ret, false) { + ret = i915_gem_object_lock(vma->obj, &ww); + + if (ret) + continue; + + vaddr = i915_vma_pin_iomap(vma); + if (IS_ERR(vaddr)) { + drm_err(&dev_priv->drm, + "Failed to remap framebuffer into virtual memory (%pe)\n", vaddr); + ret = PTR_ERR(vaddr); + continue; + } } + + if (ret) + goto out_unpin; + info->screen_base = vaddr; info->screen_size = vma->size; -- cgit From 3a84f2c6c9558c554a90ec26ad25df92fc5e05b7 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 23 Feb 2023 17:20:48 +0200 Subject: drm/i915: Preserve crtc_state->inherited during state clearing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_crtc_prepare_cleared_state() is unintentionally losing the "inherited" flag. This will happen if intel_initial_commit() is forced to go through the full modeset calculations for whatever reason. Afterwards the first real commit from userspace will not get forced to the full modeset path, and thus eg. audio state may not get recomputed properly. So if the monitor was already enabled during boot audio will not work until userspace itself does an explicit full modeset. Cc: stable@vger.kernel.org Tested-by: Lee Shawn C Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230223152048.20878-1-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar (cherry picked from commit 2553bacaf953b48c59357f5a622282bc0c45adae) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index d3994e2a7d63..208b1b5b15dd 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5145,6 +5145,7 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state, * only fields that are know to not cause problems are preserved. */ saved_state->uapi = crtc_state->uapi; + saved_state->inherited = crtc_state->inherited; saved_state->scaler_state = crtc_state->scaler_state; saved_state->shared_dpll = crtc_state->shared_dpll; saved_state->dpll_hw_state = crtc_state->dpll_hw_state; -- cgit From 088a422c3fa3ee9268d400078626b0c202cfe9dd Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Fri, 10 Mar 2023 11:43:39 +0530 Subject: drm/i915/mtl: Disable MC6 for MTL A step The Wa_14017073508 require to send Media Busy/Idle mailbox while accessing Media tile. As of now it is getting handled while __gt_unpark, __gt_park. But there are various corner cases where forcewakes are taken without __gt_unpark i.e. without sending Busy Mailbox especially during register reads. Forcewakes are taken without busy mailbox leads to GPU HANG. So bringing mailbox calls under forcewake calls are no feasible option as forcewake calls are atomic and mailbox calls are blocking. The issue already fixed in B step so disabling MC6 on A step and reverting previous commit which handles Wa_14017073508 Fixes: 8f70f1ec587d ("drm/i915/mtl: Add Wa_14017073508 for SAMedia") Cc: Rodrigo Vivi Signed-off-by: Badal Nilawar Reviewed-by: Rodrigo Vivi Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20230310061339.2495416-2-badal.nilawar@intel.com (cherry picked from commit 038a24835ab68f341eaa7a0e3bcc6ce0f9b22e17) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 27 --------------------------- drivers/gpu/drm/i915/gt/intel_rc6.c | 8 ++++++++ drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 13 +------------ drivers/gpu/drm/i915/i915_reg.h | 9 --------- 4 files changed, 9 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index cef3d6f5c34e..56b993f6e7dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -21,31 +21,10 @@ #include "intel_rc6.h" #include "intel_rps.h" #include "intel_wakeref.h" -#include "intel_pcode.h" #include "pxp/intel_pxp_pm.h" #define I915_GT_SUSPEND_IDLE_TIMEOUT (HZ / 2) -static void mtl_media_busy(struct intel_gt *gt) -{ - /* Wa_14017073508: mtl */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) - snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE, - PCODE_MBOX_GT_STATE_MEDIA_BUSY, - PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); -} - -static void mtl_media_idle(struct intel_gt *gt) -{ - /* Wa_14017073508: mtl */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) - snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE, - PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY, - PCODE_MBOX_GT_STATE_DOMAIN_MEDIA, 0); -} - static void user_forcewake(struct intel_gt *gt, bool suspend) { int count = atomic_read(>->user_wakeref); @@ -93,9 +72,6 @@ static int __gt_unpark(struct intel_wakeref *wf) GT_TRACE(gt, "\n"); - /* Wa_14017073508: mtl */ - mtl_media_busy(gt); - /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -145,9 +121,6 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); - /* Wa_14017073508: mtl */ - mtl_media_idle(gt); - return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 5c91622dfca4..f4150f61f39c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -486,6 +486,7 @@ static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) static bool rc6_supported(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); if (!HAS_RC6(i915)) return false; @@ -502,6 +503,13 @@ static bool rc6_supported(struct intel_rc6 *rc6) return false; } + if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) && + gt->type == GT_MEDIA) { + drm_notice(&i915->drm, + "Media RC6 disabled on A step\n"); + return false; + } + return true; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c index b5855091cf6a..8f8dd05835c5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -11,20 +11,9 @@ static bool __guc_rc_supported(struct intel_guc *guc) { - struct intel_gt *gt = guc_to_gt(guc); - - /* - * Wa_14017073508: mtl - * Do not enable gucrc to avoid additional interrupts which - * may disrupt pcode wa. - */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) - return false; - /* GuC RC is unavailable for pre-Gen12 */ return guc->submission_supported && - GRAPHICS_VER(gt->i915) >= 12; + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; } static bool __guc_rc_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 19b047875e62..747b53b567a0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6598,15 +6598,6 @@ /* XEHP_PCODE_FREQUENCY_CONFIG param2 */ #define PCODE_MBOX_DOMAIN_NONE 0x0 #define PCODE_MBOX_DOMAIN_MEDIAFF 0x3 - -/* Wa_14017210380: mtl */ -#define PCODE_MBOX_GT_STATE 0x50 -/* sub-commands (param1) */ -#define PCODE_MBOX_GT_STATE_MEDIA_BUSY 0x1 -#define PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY 0x2 -/* param2 */ -#define PCODE_MBOX_GT_STATE_DOMAIN_MEDIA 0x1 - #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 -- cgit From 8df23e4c4f72f4e201c28e6fb0a67e2dbf30628a Mon Sep 17 00:00:00 2001 From: John Harrison Date: Fri, 10 Mar 2023 22:37:12 -0800 Subject: drm/i915/guc: Fix missing ecodes Error captures are tagged with an 'ecode'. This is a pseduo-unique magic number that is meant to distinguish similar seeming bugs with different underlying signatures. It is a combination of two ring state registers. Unfortunately, the register state being used is only valid in execlist mode. In GuC mode, the register state exists in a separate list of arbitrary register address/value pairs rather than the named entry structure. So, search through that list to find the two exciting registers and copy them over to the structure's named members. v2: if else if instead of if if (Alan) Signed-off-by: John Harrison Reviewed-by: Alan Previn Fixes: a6f0f9cf330a ("drm/i915/guc: Plumb GuC-capture into gpu_coredump") Cc: Alan Previn Cc: Umesh Nerlige Ramappa Cc: Lucas De Marchi Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: Matt Roper Cc: Aravind Iddamsetty Cc: Michael Cheng Cc: Matthew Brost Cc: Bruce Chang Cc: Daniele Ceraolo Spurio Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230311063714.570389-2-John.C.Harrison@Intel.com (cherry picked from commit 9724ecdbb9ddd6da3260e4a442574b90fc75188a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index fc3b994626a4..710999d7189e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -1571,6 +1571,27 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf, #endif //CONFIG_DRM_I915_CAPTURE_ERROR +static void guc_capture_find_ecode(struct intel_engine_coredump *ee) +{ + struct gcap_reg_list_info *reginfo; + struct guc_mmio_reg *regs; + i915_reg_t reg_ipehr = RING_IPEHR(0); + i915_reg_t reg_instdone = RING_INSTDONE(0); + int i; + + if (!ee->guc_capture_node) + return; + + reginfo = ee->guc_capture_node->reginfo + GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE; + regs = reginfo->regs; + for (i = 0; i < reginfo->num_regs; i++) { + if (regs[i].offset == reg_ipehr.reg) + ee->ipehr = regs[i].value; + else if (regs[i].offset == reg_instdone.reg) + ee->instdone.instdone = regs[i].value; + } +} + void intel_guc_capture_free_node(struct intel_engine_coredump *ee) { if (!ee || !ee->guc_capture_node) @@ -1612,6 +1633,7 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt, list_del(&n->link); ee->guc_capture_node = n; ee->guc_capture = guc->capture; + guc_capture_find_ecode(ee); return; } } -- cgit From e92eb246feb9019b0b137706c934b8891cdfe3c2 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 14 Mar 2023 15:29:14 +0100 Subject: drm/i915/active: Fix missing debug object activation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit debug_active_activate() expected ref->count to be zero which is not true anymore as __i915_active_activate() calls debug_active_activate() after incrementing the count. v2: No need to check for "ref->count == 1" as __i915_active_activate() already make sure of that(Janusz). References: https://gitlab.freedesktop.org/drm/intel/-/issues/6733 Fixes: 04240e30ed06 ("drm/i915: Skip taking acquire mutex for no ref->active callback") Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Thomas Hellström Cc: Andi Shyti Cc: intel-gfx@lists.freedesktop.org Cc: Janusz Krzysztofik Cc: # v5.10+ Signed-off-by: Nirmoy Das Reviewed-by: Janusz Krzysztofik Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230313114613.9874-1-nirmoy.das@intel.com (cherry picked from commit bfad380c542438a9b642f8190b7fd37bc77e2723) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_active.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index a9fea115f2d2..8ef93889061a 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -92,8 +92,7 @@ static void debug_active_init(struct i915_active *ref) static void debug_active_activate(struct i915_active *ref) { lockdep_assert_held(&ref->tree_lock); - if (!atomic_read(&ref->count)) /* before the first inc */ - debug_object_activate(ref, &active_debug_desc); + debug_object_activate(ref, &active_debug_desc); } static void debug_active_deactivate(struct i915_active *ref) -- cgit From 150784f9285e656373cf3953ef4a7663f1e1a0f2 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2023 16:19:20 +0100 Subject: drm/i915/gt: perform uc late init after probe error injection Probe pseudo errors should be injected only in places where real errors can be encountered, otherwise unwinding code can be broken. Placing intel_uc_init_late before i915_inject_probe_error violated this rule, resulting in following bug: __intel_gt_disable:655 GEM_BUG_ON(intel_gt_pm_is_awake(gt)) Fixes: 481d458caede ("drm/i915/guc: Add golden context to GuC ADS") Acked-by: Nirmoy Das Reviewed-by: Andi Shyti Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20230314151920.1065847-1-andrzej.hajda@intel.com (cherry picked from commit c4252a11131c7f27a158294241466e2a4e7ff94e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_gt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f0dbfc434e07..40d357cf8b04 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -737,12 +737,12 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_gt; - intel_uc_init_late(>->uc); - err = i915_inject_probe_error(gt->i915, -EIO); if (err) goto err_gt; + intel_uc_init_late(>->uc); + intel_migrate_init(>->migrate, gt); goto out_fw; -- cgit From f8d62aa8d24d9883df738e450bfe6be396e11979 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 14 Mar 2023 19:29:06 -0700 Subject: drm/i915: Fix format for perf_limit_reasons Use hex format so that it is easier to decode. Fixes: fe5979665f64 ("drm/i915/debugfs: Add perf_limit_reasons in debugfs") Signed-off-by: Vinay Belgaumkar Reviewed-by: Ashutosh Dixit Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20230315022906.2467408-1-vinay.belgaumkar@intel.com (cherry picked from commit 5e008ba67cb80084e99b40ccd46f9029ae421632) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 83df4cd5e06c..80dbbef86b1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -580,7 +580,7 @@ static bool perf_limit_reasons_eval(void *data) } DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get, - perf_limit_reasons_clear, "%llu\n"); + perf_limit_reasons_clear, "0x%llx\n"); void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root) { -- cgit From 59ad01c786a4c94afacc7feb0ab97bf8d6672a46 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Sat, 11 Mar 2023 01:58:25 +0200 Subject: drm/i915: Update vblank timestamping stuff on seamless M/N change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we change the M/N values seamlessly during a fastset we should also update the vblank timestamping stuff to make sure the vblank timestamp corrections/guesstimations come out exact. Note that only crtc_clock and framedur_ns can actually end up changing here during fastsets. Everything else we touch can only change during full modesets. Technically we should try to do this exactly at the start of vblank, but that would require some kind of double buffering scheme. Let's skip that for now and just update things right after the commit has been submitted to the hardware. This means the information will be properly up to date when the vblank irq handler goes to work. Only if someone ends up querying some vblanky stuff in between the commit and start of vblank may we see a slight discrepancy. Also this same problem really exists for the DRRS downclocking stuff. But as that is supposed to be more or less transparent to the user, and it only drops to low gear after a long delay (1 sec currently) we probably don't have to worry about it. Any time something is actively submitting updates DRRS will remain in high gear and so the timestamping constants will match the hardware state. Reviewed-by: Jani Nikula Reviewed-by: Mitul Golani Fixes: e6f29923c048 ("drm/i915: Allow M/N change during fastset on bdw+") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230310235828.17439-1-ville.syrjala@linux.intel.com (cherry picked from commit 8cb1f95cca68421b08333175719fdd3615372ca8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_crtc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 82be0fbe9934..d5b5d40ed817 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -683,6 +683,14 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) */ intel_vrr_send_push(new_crtc_state); + /* + * Seamless M/N update may need to update frame timings. + * + * FIXME Should be synchronized with the start of vblank somehow... + */ + if (new_crtc_state->seamless_m_n && intel_crtc_needs_fastset(new_crtc_state)) + intel_crtc_update_active_timings(new_crtc_state); + local_irq_enable(); if (intel_vgpu_active(dev_priv)) -- cgit From a8eff03545d4cef12ae66a1905627c1818a0f81a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 18 Mar 2023 01:19:00 +0200 Subject: net: dsa: report rx_bytes unadjusted for ETH_HLEN We collect the software statistics counters for RX bytes (reported to /proc/net/dev and to ethtool -S $dev | grep 'rx_bytes: ") at a time when skb->len has already been adjusted by the eth_type_trans() -> skb_pull_inline(skb, ETH_HLEN) call to exclude the L2 header. This means that when connecting 2 DSA interfaces back to back and sending 1 packet with length 100, the sending interface will report tx_bytes as incrementing by 100, and the receiving interface will report rx_bytes as incrementing by 86. Since accounting for that in scripts is quirky and is something that would be DSA-specific behavior (requiring users to know that they are running on a DSA interface in the first place), the proposal is that we treat it as a bug and fix it. This design bug has always existed in DSA, according to my analysis: commit 91da11f870f0 ("net: Distributed Switch Architecture protocol support") also updates skb->dev->stats.rx_bytes += skb->len after the eth_type_trans() call. Technically, prior to Florian's commit a86d8becc3f0 ("net: dsa: Factor bottom tag receive functions"), each and every vendor-specific tagging protocol driver open-coded the same bug, until the buggy code was consolidated into something resembling what can be seen now. So each and every driver should have its own Fixes: tag, because of their different histories until the convergence point. I'm not going to do that, for the sake of simplicity, but just blame the oldest appearance of buggy code. There are 2 ways to fix the problem. One is the obvious way, and the other is how I ended up doing it. Obvious would have been to move dev_sw_netstats_rx_add() one line above eth_type_trans(), and below skb_push(skb, ETH_HLEN). But DSA processing is not as simple as that. We count the bytes after removing everything DSA-related from the packet, to emulate what the packet's length was, on the wire, when the user port received it. When eth_type_trans() executes, dsa_untag_bridge_pvid() has not run yet, so in case the switch driver requests this behavior - commit 412a1526d067 ("net: dsa: untag the bridge pvid from rx skbs") has the details - the obvious variant of the fix wouldn't have worked, because the positioning there would have also counted the not-yet-stripped VLAN header length, something which is absent from the packet as seen on the wire (there it may be untagged, whereas software will see it as PVID-tagged). Fixes: f613ed665bb3 ("net: dsa: Add support for 64-bit statistics") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag.c b/net/dsa/tag.c index b2fba1a003ce..5105a5ff58fa 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -114,7 +114,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, skb = nskb; } - dev_sw_netstats_rx_add(skb->dev, skb->len); + dev_sw_netstats_rx_add(skb->dev, skb->len + ETH_HLEN); if (dsa_skb_defer_rx_timestamp(p, skb)) return 0; -- cgit From 6b6bc5b8bd2d4ca9e1efa9ae0f98a0b0687ace75 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sat, 18 Mar 2023 16:05:26 +0800 Subject: net: qcom/emac: Fix use after free bug in emac_remove due to race condition In emac_probe, &adpt->work_thread is bound with emac_work_thread. Then it will be started by timeout handler emac_tx_timeout or a IRQ handler emac_isr. If we remove the driver which will call emac_remove to make cleanup, there may be a unfinished work. The possible sequence is as follows: Fix it by finishing the work before cleanup in the emac_remove and disable timeout response. CPU0 CPU1 |emac_work_thread emac_remove | free_netdev | kfree(netdev); | |emac_reinit_locked |emac_mac_down |//use netdev Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver") Signed-off-by: Zheng Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 3115b2c12898..eaa50050aa0b 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -724,9 +724,15 @@ static int emac_remove(struct platform_device *pdev) struct net_device *netdev = dev_get_drvdata(&pdev->dev); struct emac_adapter *adpt = netdev_priv(netdev); + netif_carrier_off(netdev); + netif_tx_disable(netdev); + unregister_netdev(netdev); netif_napi_del(&adpt->rx_q.napi); + free_irq(adpt->irq.irq, &adpt->irq); + cancel_work_sync(&adpt->work_thread); + emac_clks_teardown(adpt); put_device(&adpt->phydev->mdio.dev); -- cgit From 7f247f5a2c18b3f21206cdd51193df4f38e1b9f5 Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Sat, 18 Mar 2023 10:25:52 +0100 Subject: net: usb: lan78xx: Limit packet length to skb->len Packet length retrieved from descriptor may be larger than the actual socket buffer length. In such case the cloned skb passed up the network stack will leak kernel memory contents. Additionally prevent integer underflow when size is less than ETH_FCS_LEN. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Szymon Heidrich Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 068488890d57..c458c030fadf 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3579,13 +3579,29 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb, size = (rx_cmd_a & RX_CMD_A_LEN_MASK_); align_count = (4 - ((size + RXW_PADDING) % 4)) % 4; + if (unlikely(size > skb->len)) { + netif_dbg(dev, rx_err, dev->net, + "size err rx_cmd_a=0x%08x\n", + rx_cmd_a); + return 0; + } + if (unlikely(rx_cmd_a & RX_CMD_A_RED_)) { netif_dbg(dev, rx_err, dev->net, "Error rx_cmd_a=0x%08x", rx_cmd_a); } else { - u32 frame_len = size - ETH_FCS_LEN; + u32 frame_len; struct sk_buff *skb2; + if (unlikely(size < ETH_FCS_LEN)) { + netif_dbg(dev, rx_err, dev->net, + "size err rx_cmd_a=0x%08x\n", + rx_cmd_a); + return 0; + } + + frame_len = size - ETH_FCS_LEN; + skb2 = napi_alloc_skb(&dev->napi, frame_len); if (!skb2) return 0; -- cgit From 7d722c9802d4bd61a1f1614e07413b6a5fac382d Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 18 Mar 2023 09:13:42 -0400 Subject: usb: plusb: remove unused pl_clear_QuickLink_features function clang with W=1 reports drivers/net/usb/plusb.c:65:1: error: unused function 'pl_clear_QuickLink_features' [-Werror,-Wunused-function] pl_clear_QuickLink_features(struct usbnet *dev, int val) ^ This static function is not used, so remove it. Signed-off-by: Tom Rix Signed-off-by: David S. Miller --- drivers/net/usb/plusb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 7a2b0094de51..2894114858a2 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -61,12 +61,6 @@ pl_vendor_req(struct usbnet *dev, u8 req, u8 val, u8 index) val, index, NULL, 0); } -static inline int -pl_clear_QuickLink_features(struct usbnet *dev, int val) -{ - return pl_vendor_req(dev, 1, (u8) val, 0); -} - static inline int pl_set_QuickLink_features(struct usbnet *dev, int val) { -- cgit From 19b3bb51c3bc288b3f2c6f8c4450b0f548320625 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Sat, 18 Mar 2023 17:39:16 +0000 Subject: net/ps3_gelic_net: Fix RX sk_buff length The Gelic Ethernet device needs to have the RX sk_buffs aligned to GELIC_NET_RXBUF_ALIGN, and also the length of the RX sk_buffs must be a multiple of GELIC_NET_RXBUF_ALIGN. The current Gelic Ethernet driver was not allocating sk_buffs large enough to allow for this alignment. Also, correct the maximum and minimum MTU sizes, and add a new preprocessor macro for the maximum frame size, GELIC_NET_MAX_FRAME. Fixes various randomly occurring runtime network errors. Fixes: 02c1889166b4 ("ps3: gigabit ethernet driver for PS3, take3") Signed-off-by: Geoff Levand Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 19 ++++++++++--------- drivers/net/ethernet/toshiba/ps3_gelic_net.h | 5 +++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index cf8de8a7a8a1..dffd664e65f4 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -365,26 +365,27 @@ iommu_error: * * allocates a new rx skb, iommu-maps it and attaches it to the descriptor. * Activate the descriptor state-wise + * + * Gelic RX sk_buffs must be aligned to GELIC_NET_RXBUF_ALIGN and the length + * must be a multiple of GELIC_NET_RXBUF_ALIGN. */ static int gelic_descr_prepare_rx(struct gelic_card *card, struct gelic_descr *descr) { + static const unsigned int rx_skb_size = + ALIGN(GELIC_NET_MAX_FRAME, GELIC_NET_RXBUF_ALIGN) + + GELIC_NET_RXBUF_ALIGN - 1; int offset; - unsigned int bufsize; if (gelic_descr_get_status(descr) != GELIC_DESCR_DMA_NOT_IN_USE) dev_info(ctodev(card), "%s: ERROR status\n", __func__); - /* we need to round up the buffer size to a multiple of 128 */ - bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN); - /* and we need to have it 128 byte aligned, therefore we allocate a - * bit more */ - descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1); + descr->skb = netdev_alloc_skb(*card->netdev, rx_skb_size); if (!descr->skb) { descr->buf_addr = 0; /* tell DMAC don't touch memory */ return -ENOMEM; } - descr->buf_size = cpu_to_be32(bufsize); + descr->buf_size = cpu_to_be32(rx_skb_size); descr->dmac_cmd_status = 0; descr->result_size = 0; descr->valid_size = 0; @@ -397,7 +398,7 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, /* io-mmu-map the skb */ descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card), descr->skb->data, - GELIC_NET_MAX_MTU, + GELIC_NET_MAX_FRAME, DMA_FROM_DEVICE)); if (!descr->buf_addr) { dev_kfree_skb_any(descr->skb); @@ -915,7 +916,7 @@ static void gelic_net_pass_skb_up(struct gelic_descr *descr, data_error = be32_to_cpu(descr->data_error); /* unmap skb buffer */ dma_unmap_single(ctodev(card), be32_to_cpu(descr->buf_addr), - GELIC_NET_MAX_MTU, + GELIC_NET_MAX_FRAME, DMA_FROM_DEVICE); skb_put(skb, be32_to_cpu(descr->valid_size)? diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 68f324ed4eaf..0d98defb011e 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -19,8 +19,9 @@ #define GELIC_NET_RX_DESCRIPTORS 128 /* num of descriptors */ #define GELIC_NET_TX_DESCRIPTORS 128 /* num of descriptors */ -#define GELIC_NET_MAX_MTU VLAN_ETH_FRAME_LEN -#define GELIC_NET_MIN_MTU VLAN_ETH_ZLEN +#define GELIC_NET_MAX_FRAME 2312 +#define GELIC_NET_MAX_MTU 2294 +#define GELIC_NET_MIN_MTU 64 #define GELIC_NET_RXBUF_ALIGN 128 #define GELIC_CARD_RX_CSUM_DEFAULT 1 /* hw chksum */ #define GELIC_NET_WATCHDOG_TIMEOUT 5*HZ -- cgit From bebe933d35a63d4f042fbf4dce4f22e689ba0fcd Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Sat, 18 Mar 2023 17:39:16 +0000 Subject: net/ps3_gelic_net: Use dma_mapping_error The current Gelic Etherenet driver was checking the return value of its dma_map_single call, and not using the dma_mapping_error() routine. Fixes runtime problems like these: DMA-API: ps3_gelic_driver sb_05: device driver failed to check map error WARNING: CPU: 0 PID: 0 at kernel/dma/debug.c:1027 .check_unmap+0x888/0x8dc Fixes: 02c1889166b4 ("ps3: gigabit ethernet driver for PS3, take3") Reviewed-by: Alexander Duyck Signed-off-by: Geoff Levand Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index dffd664e65f4..9d535ae59626 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -317,15 +317,17 @@ static int gelic_card_init_chain(struct gelic_card *card, /* set up the hardware pointers in each descriptor */ for (i = 0; i < no; i++, descr++) { + dma_addr_t cpu_addr; + gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE); - descr->bus_addr = - dma_map_single(ctodev(card), descr, - GELIC_DESCR_SIZE, - DMA_BIDIRECTIONAL); - if (!descr->bus_addr) + cpu_addr = dma_map_single(ctodev(card), descr, + GELIC_DESCR_SIZE, DMA_BIDIRECTIONAL); + + if (dma_mapping_error(ctodev(card), cpu_addr)) goto iommu_error; + descr->bus_addr = cpu_to_be32(cpu_addr); descr->next = descr + 1; descr->prev = descr - 1; } @@ -375,6 +377,7 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, static const unsigned int rx_skb_size = ALIGN(GELIC_NET_MAX_FRAME, GELIC_NET_RXBUF_ALIGN) + GELIC_NET_RXBUF_ALIGN - 1; + dma_addr_t cpu_addr; int offset; if (gelic_descr_get_status(descr) != GELIC_DESCR_DMA_NOT_IN_USE) @@ -396,11 +399,10 @@ static int gelic_descr_prepare_rx(struct gelic_card *card, if (offset) skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset); /* io-mmu-map the skb */ - descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card), - descr->skb->data, - GELIC_NET_MAX_FRAME, - DMA_FROM_DEVICE)); - if (!descr->buf_addr) { + cpu_addr = dma_map_single(ctodev(card), descr->skb->data, + GELIC_NET_MAX_FRAME, DMA_FROM_DEVICE); + descr->buf_addr = cpu_to_be32(cpu_addr); + if (dma_mapping_error(ctodev(card), cpu_addr)) { dev_kfree_skb_any(descr->skb); descr->skb = NULL; dev_info(ctodev(card), @@ -780,7 +782,7 @@ static int gelic_descr_prepare_tx(struct gelic_card *card, buf = dma_map_single(ctodev(card), skb->data, skb->len, DMA_TO_DEVICE); - if (!buf) { + if (dma_mapping_error(ctodev(card), buf)) { dev_err(ctodev(card), "dma map 2 failed (%p, %i). Dropping packet\n", skb->data, skb->len); -- cgit From 22aa20e4c5dcbe6fdc480eb4fb27039b1f43217f Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 19 Mar 2023 07:03:00 -0700 Subject: Revert "drm/i915/hwmon: Enable PL1 power limit" This reverts commit ee892ea83d99610fa33bea612de058e0955eec3a. It was accidentally picked up for backporting. Revert. Cc: Jani Nikula Cc: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20230319140300.2892032-1-ashutosh.dixit@intel.com --- drivers/gpu/drm/i915/i915_hwmon.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c index 4683a5b96eff..1225bc432f0d 100644 --- a/drivers/gpu/drm/i915/i915_hwmon.c +++ b/drivers/gpu/drm/i915/i915_hwmon.c @@ -687,11 +687,6 @@ hwm_get_preregistration_info(struct drm_i915_private *i915) for_each_gt(gt, i915, i) hwm_energy(&hwmon->ddat_gt[i], &energy); } - - /* Enable PL1 power limit */ - if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit)) - hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit, - PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN); } void i915_hwmon_register(struct drm_i915_private *i915) -- cgit From 58cdfe6f58b35f17f56386f5fcf937168a423ad1 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 15 Mar 2023 18:04:50 -0400 Subject: thunderbolt: Rename shadowed variables bit to interrupt_bit and auto_clear_bit cppcheck reports drivers/thunderbolt/nhi.c:74:7: style: Local variable 'bit' shadows outer variable [shadowVariable] int bit; ^ drivers/thunderbolt/nhi.c:66:6: note: Shadowed declaration int bit = ring_interrupt_index(ring) & 31; ^ drivers/thunderbolt/nhi.c:74:7: note: Shadow variable int bit; ^ For readablity rename the outer to interrupt_bit and the innner to auto_clear_bit. Fixes: 468c49f44759 ("thunderbolt: Disable interrupt auto clear for ring") Cc: stable@vger.kernel.org Signed-off-by: Tom Rix Signed-off-by: Mika Westerberg --- drivers/thunderbolt/nhi.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 318d20bd5b69..cfebec107f3f 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -63,15 +63,15 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active) { int reg = REG_RING_INTERRUPT_BASE + ring_interrupt_index(ring) / 32 * 4; - int bit = ring_interrupt_index(ring) & 31; - int mask = 1 << bit; + int interrupt_bit = ring_interrupt_index(ring) & 31; + int mask = 1 << interrupt_bit; u32 old, new; if (ring->irq > 0) { u32 step, shift, ivr, misc; void __iomem *ivr_base; + int auto_clear_bit; int index; - int bit; if (ring->is_tx) index = ring->hop; @@ -91,11 +91,12 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active) */ misc = ioread32(ring->nhi->iobase + REG_DMA_MISC); if (ring->nhi->quirks & QUIRK_AUTO_CLEAR_INT) - bit = REG_DMA_MISC_INT_AUTO_CLEAR; + auto_clear_bit = REG_DMA_MISC_INT_AUTO_CLEAR; else - bit = REG_DMA_MISC_DISABLE_AUTO_CLEAR; - if (!(misc & bit)) - iowrite32(misc | bit, ring->nhi->iobase + REG_DMA_MISC); + auto_clear_bit = REG_DMA_MISC_DISABLE_AUTO_CLEAR; + if (!(misc & auto_clear_bit)) + iowrite32(misc | auto_clear_bit, + ring->nhi->iobase + REG_DMA_MISC); ivr_base = ring->nhi->iobase + REG_INT_VEC_ALLOC_BASE; step = index / REG_INT_VEC_ALLOC_REGS * REG_INT_VEC_ALLOC_BITS; @@ -115,7 +116,7 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active) dev_dbg(&ring->nhi->pdev->dev, "%s interrupt at register %#x bit %d (%#x -> %#x)\n", - active ? "enabling" : "disabling", reg, bit, old, new); + active ? "enabling" : "disabling", reg, interrupt_bit, old, new); if (new == old) dev_WARN(&ring->nhi->pdev->dev, -- cgit From 5e7a3bf65db57461d0f47955248fcadf37321a74 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Mar 2023 16:59:46 +0100 Subject: ACPI: video: Add backlight=native DMI quirk for Acer Aspire 3830TG The Acer Aspire 3830TG predates Windows 8, so it defaults to using acpi_video# for backlight control, but this is non functional on this model. Add a DMI quirk to use the native backlight interface which does work properly. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 14d6d81e536f..fd7cbce8076e 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -495,6 +495,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"), }, }, + { + .callback = video_detect_force_native, + /* Acer Aspire 3830TG */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 3830TG"), + }, + }, { .callback = video_detect_force_native, /* Acer Aspire 4810T */ -- cgit From 7d31677bb7b1944ac89e9155110dc1b9acbb3895 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 27 Jan 2023 23:14:00 +0100 Subject: gpu: host1x: fix uninitialized variable use The error handling for platform_get_irq() failing no longer works after a recent change, clang now points this out with a warning: drivers/gpu/host1x/dev.c:520:6: error: variable 'syncpt_irq' is uninitialized when used here [-Werror,-Wuninitialized] if (syncpt_irq < 0) ^~~~~~~~~~ Fix this by removing the variable and checking the correct error status. Fixes: 625d4ffb438c ("gpu: host1x: Rewrite syncpoint interrupt handling") Signed-off-by: Arnd Bergmann Reviewed-by: Jon Hunter Reviewed-by: Nick Desaulniers Reviewed-by: Mikko Perttunen Reviewed-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- drivers/gpu/host1x/dev.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 4872d183d860..aae2efeef503 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -487,7 +487,6 @@ static int host1x_get_resets(struct host1x *host) static int host1x_probe(struct platform_device *pdev) { struct host1x *host; - int syncpt_irq; int err; host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); @@ -517,8 +516,8 @@ static int host1x_probe(struct platform_device *pdev) } host->syncpt_irq = platform_get_irq(pdev, 0); - if (syncpt_irq < 0) - return syncpt_irq; + if (host->syncpt_irq < 0) + return host->syncpt_irq; mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); -- cgit From d7e673c2a900206bea3461a4b4ecc74ea930f80e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 20 Mar 2023 15:35:06 +0900 Subject: zonefs: Prevent uninitialized symbol 'size' warning In zonefs_file_dio_append(), initialize the variable size to 0 to prevent compilation and static code analizers warning such as: New smatch warnings: fs/zonefs/file.c:441 zonefs_file_dio_append() error: uninitialized symbol 'size'. The warning is a false positive as size is never actually used uninitialized. No functional change. Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202303191227.GL8Dprbi-lkp@intel.com/ Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Himanshu Madhani --- fs/zonefs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 738b0e28d74b..a545a6d9a32e 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -383,7 +383,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) struct block_device *bdev = inode->i_sb->s_bdev; unsigned int max = bdev_max_zone_append_sectors(bdev); struct bio *bio; - ssize_t size; + ssize_t size = 0; int nr_pages; ssize_t ret; -- cgit From 88b170088ad2c3e27086fe35769aa49f8a512564 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 20 Mar 2023 22:49:15 +0900 Subject: zonefs: Fix error message in zonefs_file_dio_append() Since the expected write location in a sequential file is always at the end of the file (append write), when an invalid write append location is detected in zonefs_file_dio_append(), print the invalid written location instead of the expected write location. Fixes: a608da3bd730 ("zonefs: Detect append writes at invalid locations") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Himanshu Madhani --- fs/zonefs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index a545a6d9a32e..617e4f9db42e 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -426,7 +426,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) if (bio->bi_iter.bi_sector != wpsector) { zonefs_warn(inode->i_sb, "Corrupted write pointer %llu for zone at %llu\n", - wpsector, z->z_sector); + bio->bi_iter.bi_sector, z->z_sector); ret = -EIO; } } -- cgit From 9d2789ac9d60c049d26ef6d3005d9c94c5a559e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Mar 2023 20:01:25 -0600 Subject: block/io_uring: pass in issue_flags for uring_cmd task_work handling io_uring_cmd_done() currently assumes that the uring_lock is held when invoked, and while it generally is, this is not guaranteed. Pass in the issue_flags associated with it, so that we have IO_URING_F_UNLOCKED available to be able to lock the CQ ring appropriately when completing events. Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 31 ++++++++++++++++++------------- drivers/nvme/host/ioctl.c | 14 ++++++++------ include/linux/io_uring.h | 11 ++++++----- io_uring/uring_cmd.c | 10 ++++++---- 4 files changed, 38 insertions(+), 28 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index fb5a557afde8..c73cc57ec547 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -715,7 +715,8 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, } } -static void ubq_complete_io_cmd(struct ublk_io *io, int res) +static void ubq_complete_io_cmd(struct ublk_io *io, int res, + unsigned issue_flags) { /* mark this cmd owned by ublksrv */ io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV; @@ -727,7 +728,7 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) io->flags &= ~UBLK_IO_FLAG_ACTIVE; /* tell ublksrv one io request is coming */ - io_uring_cmd_done(io->cmd, res, 0); + io_uring_cmd_done(io->cmd, res, 0, issue_flags); } #define UBLK_REQUEUE_DELAY_MS 3 @@ -744,7 +745,8 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0); } -static inline void __ublk_rq_task_work(struct request *req) +static inline void __ublk_rq_task_work(struct request *req, + unsigned issue_flags) { struct ublk_queue *ubq = req->mq_hctx->driver_data; int tag = req->tag; @@ -782,7 +784,7 @@ static inline void __ublk_rq_task_work(struct request *req) pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n", __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags); - ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA); + ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA, issue_flags); return; } /* @@ -820,17 +822,18 @@ static inline void __ublk_rq_task_work(struct request *req) mapped_bytes >> 9; } - ubq_complete_io_cmd(io, UBLK_IO_RES_OK); + ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } -static inline void ublk_forward_io_cmds(struct ublk_queue *ubq) +static inline void ublk_forward_io_cmds(struct ublk_queue *ubq, + unsigned issue_flags) { struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); struct ublk_rq_data *data, *tmp; io_cmds = llist_reverse_order(io_cmds); llist_for_each_entry_safe(data, tmp, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); + __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags); } static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) @@ -842,12 +845,12 @@ static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); } -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) +static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - ublk_forward_io_cmds(ubq); + ublk_forward_io_cmds(ubq, issue_flags); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -856,8 +859,9 @@ static void ublk_rq_task_work_fn(struct callback_head *work) struct ublk_rq_data, work); struct request *req = blk_mq_rq_from_pdu(data); struct ublk_queue *ubq = req->mq_hctx->driver_data; + unsigned issue_flags = IO_URING_F_UNLOCKED; - ublk_forward_io_cmds(ubq); + ublk_forward_io_cmds(ubq, issue_flags); } static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) @@ -1111,7 +1115,8 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) struct ublk_io *io = &ubq->ios[i]; if (io->flags & UBLK_IO_FLAG_ACTIVE) - io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0); + io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, + IO_URING_F_UNLOCKED); } /* all io commands are canceled */ @@ -1351,7 +1356,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) return -EIOCBQUEUED; out: - io_uring_cmd_done(cmd, ret, 0); + io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n", __func__, cmd_op, tag, ret, io->flags); return -EIOCBQUEUED; @@ -2234,7 +2239,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (ub) ublk_put_device(ub); out: - io_uring_cmd_done(cmd, ret, 0); + io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n", __func__, ret, cmd->cmd_op, header->dev_id, header->queue_id); return -EIOCBQUEUED; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 723e7d5b778f..d24ea2e05156 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -464,7 +464,8 @@ static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; } -static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) +static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd, + unsigned issue_flags) { struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); struct request *req = pdu->req; @@ -485,17 +486,18 @@ static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) blk_rq_unmap_user(req->bio); blk_mq_free_request(req); - io_uring_cmd_done(ioucmd, status, result); + io_uring_cmd_done(ioucmd, status, result, issue_flags); } -static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) +static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, + unsigned issue_flags) { struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); if (pdu->bio) blk_rq_unmap_user(pdu->bio); - io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result); + io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result, issue_flags); } static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, @@ -517,7 +519,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, * Otherwise, move the completion to task work. */ if (cookie != NULL && blk_rq_is_poll(req)) - nvme_uring_task_cb(ioucmd); + nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED); else io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); @@ -539,7 +541,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, * Otherwise, move the completion to task work. */ if (cookie != NULL && blk_rq_is_poll(req)) - nvme_uring_task_meta_cb(ioucmd); + nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED); else io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 934e5dd4ccc0..35b9328ca335 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -27,7 +27,7 @@ struct io_uring_cmd { const void *cmd; union { /* callback to defer completions to task context */ - void (*task_work_cb)(struct io_uring_cmd *cmd); + void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); /* used for polled completion */ void *cookie; }; @@ -39,9 +39,10 @@ struct io_uring_cmd { #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); -void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); +void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, + unsigned issue_flags); void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)); + void (*task_work_cb)(struct io_uring_cmd *, unsigned)); struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); @@ -72,11 +73,11 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, return -EOPNOTSUPP; } static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, - ssize_t ret2) + ssize_t ret2, unsigned issue_flags) { } static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)) + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { } static inline struct sock *io_uring_get_socket(struct file *file) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 446a189b78b0..e535e8db01e3 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -15,12 +15,13 @@ static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); + unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; - ioucmd->task_work_cb(ioucmd); + ioucmd->task_work_cb(ioucmd, issue_flags); } void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)) + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); @@ -42,7 +43,8 @@ static inline void io_req_set_cqe32_extra(struct io_kiocb *req, * Called by consumers of io_uring_cmd, if they originally returned * -EIOCBQUEUED upon receiving the command. */ -void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) +void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2, + unsigned issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); @@ -56,7 +58,7 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) /* order with io_iopoll_req_issued() checking ->iopoll_complete */ smp_store_release(&req->iopoll_completed, 1); else - io_req_complete_post(req, 0); + io_req_complete_post(req, issue_flags); } EXPORT_SYMBOL_GPL(io_uring_cmd_done); -- cgit From 74e2e17ee1f8d8a0928b90434ad7e2df70f8483e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Mar 2023 11:13:49 -0600 Subject: io_uring/net: avoid sending -ECONNABORTED on repeated connection requests Since io_uring does nonblocking connect requests, if we do two repeated ones without having a listener, the second will get -ECONNABORTED rather than the expected -ECONNREFUSED. Treat -ECONNABORTED like a normal retry condition if we're nonblocking, if we haven't already seen it. Cc: stable@vger.kernel.org Fixes: 3fb1bd688172 ("io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT") Link: https://github.com/axboe/liburing/issues/828 Reported-by: Hui, Chunyang Signed-off-by: Jens Axboe --- io_uring/net.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index b7f190ca528e..4040cf093318 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -47,6 +47,7 @@ struct io_connect { struct sockaddr __user *addr; int addr_len; bool in_progress; + bool seen_econnaborted; }; struct io_sr_msg { @@ -1424,7 +1425,7 @@ int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); conn->addr_len = READ_ONCE(sqe->addr2); - conn->in_progress = false; + conn->in_progress = conn->seen_econnaborted = false; return 0; } @@ -1461,18 +1462,24 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) ret = __sys_connect_file(req->file, &io->address, connect->addr_len, file_flags); - if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { + if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) + && force_nonblock) { if (ret == -EINPROGRESS) { connect->in_progress = true; - } else { - if (req_has_async_data(req)) - return -EAGAIN; - if (io_alloc_async_data(req)) { - ret = -ENOMEM; + return -EAGAIN; + } + if (ret == -ECONNABORTED) { + if (connect->seen_econnaborted) goto out; - } - memcpy(req->async_data, &__io, sizeof(__io)); + connect->seen_econnaborted = true; + } + if (req_has_async_data(req)) + return -EAGAIN; + if (io_alloc_async_data(req)) { + ret = -ENOMEM; + goto out; } + memcpy(req->async_data, &__io, sizeof(__io)); return -EAGAIN; } if (ret == -ERESTARTSYS) -- cgit From f038f3917baf04835ba2b7bcf2a04ac93fbf8a9c Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 17 Mar 2023 14:43:37 +0800 Subject: octeontx2-vf: Add missing free for alloc_percpu Add the free_percpu for the allocated "vf->hw.lmt_info" in order to avoid memory leak, same as the "pf->hw.lmt_info" in `drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c`. Fixes: 5c0512072f65 ("octeontx2-pf: cn10k: Use runtime allocated LMTLINE region") Signed-off-by: Jiasheng Jiang Reviewed-by: Michal Swiatkowski Acked-by: Geethasowjanya Akula Link: https://lore.kernel.org/r/20230317064337.18198-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 7f8ffbf79cf7..ab126f8706c7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -709,6 +709,7 @@ err_unreg_netdev: err_ptp_destroy: otx2_ptp_destroy(vf); err_detach_rsrc: + free_percpu(vf->hw.lmt_info); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) qmem_free(vf->dev, vf->dync_lmt); otx2_detach_resources(&vf->mbox); @@ -762,6 +763,7 @@ static void otx2vf_remove(struct pci_dev *pdev) otx2_shutdown_tc(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); + free_percpu(vf->hw.lmt_info); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) qmem_free(vf->dev, vf->dync_lmt); otx2vf_vfaf_mbox_destroy(vf); -- cgit From 03aecb1acbcd7a660f97d645ca6c09d9de27ff9d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2023 10:52:18 +0100 Subject: drm: panel-orientation-quirks: Add quirk for Lenovo Yoga Book X90F Like the Windows Lenovo Yoga Book X91F/L the Android Lenovo Yoga Book X90F/L has a portrait 1200x1920 screen used in landscape mode, add a quirk for this. When the quirk for the X91F/L was initially added it was written to also apply to the X90F/L but this does not work because the Android version of the Yoga Book uses completely different DMI strings. Also adjust the X91F/L quirk to reflect that it only applies to the X91F/L models. Signed-off-by: Hans de Goede Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20230301095218.28457-1-hdegoede@redhat.com --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 5522d610c5cf..b1a38e6ce2f8 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -328,10 +328,17 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, - }, { /* Lenovo Yoga Book X90F / X91F / X91L */ + }, { /* Lenovo Yoga Book X90F / X90L */ .matches = { - /* Non exact match to match all versions */ - DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Yoga Book X91F / X91L */ + .matches = { + /* Non exact match to match F + L versions */ + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* Lenovo Yoga Tablet 2 830F / 830L */ -- cgit From f87d28673b71b35b248231a2086f9404afbb7f28 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Feb 2023 16:01:36 -0800 Subject: entry: Fix noinstr warning in __enter_from_user_mode() __enter_from_user_mode() is triggering noinstr warnings with CONFIG_DEBUG_PREEMPT due to its call of preempt_count_add() via ct_state(). The preemption disable isn't needed as interrupts are already disabled. And the context_tracking_enabled() check in ct_state() also isn't needed as that's already being done by the CT_WARN_ON(). Just use __ct_state() instead. Fixes the following warnings: vmlinux.o: warning: objtool: enter_from_user_mode+0xba: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: syscall_enter_from_user_mode+0xf9: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: syscall_enter_from_user_mode_prepare+0xc7: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: irqentry_enter_from_user_mode+0xba: call to preempt_count_add() leaves .noinstr.text section Fixes: 171476775d32 ("context_tracking: Convert state to atomic_t") Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/d8955fa6d68dc955dda19baf13ae014ae27926f5.1677369694.git.jpoimboe@kernel.org --- include/linux/context_tracking.h | 1 + include/linux/context_tracking_state.h | 2 ++ kernel/entry/common.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d4afa8508a80..3a7909ed5498 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -96,6 +96,7 @@ static inline void user_exit_irqoff(void) { } static inline int exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline int ct_state(void) { return -1; } +static inline int __ct_state(void) { return -1; } static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } #define CT_WARN_ON(cond) do { } while (0) diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 4a4d56f77180..fdd537ea513f 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -46,7 +46,9 @@ struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING DECLARE_PER_CPU(struct context_tracking, context_tracking); +#endif +#ifdef CONFIG_CONTEXT_TRACKING_USER static __always_inline int __ct_state(void) { return arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK; diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 846add8394c4..1314894d2efa 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -21,7 +21,7 @@ static __always_inline void __enter_from_user_mode(struct pt_regs *regs) arch_enter_from_user_mode(regs); lockdep_hardirqs_off(CALLER_ADDR0); - CT_WARN_ON(ct_state() != CONTEXT_USER); + CT_WARN_ON(__ct_state() != CONTEXT_USER); user_exit_irqoff(); instrumentation_begin(); -- cgit From a53ce18cacb477dd0513c607f187d16f0fa96f71 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Fri, 17 Mar 2023 17:08:10 +0100 Subject: sched/fair: Sanitize vruntime of entity being migrated Commit 829c1651e9c4 ("sched/fair: sanitize vruntime of entity being placed") fixes an overflowing bug, but ignore a case that se->exec_start is reset after a migration. For fixing this case, we delay the reset of se->exec_start after placing the entity which se->exec_start to detect long sleeping task. In order to take into account a possible divergence between the clock_task of 2 rqs, we increase the threshold to around 104 days. Fixes: 829c1651e9c4 ("sched/fair: sanitize vruntime of entity being placed") Originally-by: Zhang Qiao Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Zhang Qiao Link: https://lore.kernel.org/r/20230317160810.107988-1-vincent.guittot@linaro.org --- kernel/sched/core.c | 3 +++ kernel/sched/fair.c | 55 ++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 488655f2319f..0d18c3969f90 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2084,6 +2084,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) void activate_task(struct rq *rq, struct task_struct *p, int flags) { + if (task_on_rq_migrating(p)) + flags |= ENQUEUE_MIGRATED; + enqueue_task(rq, p, flags); p->on_rq = TASK_ON_RQ_QUEUED; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7a1b1f855b96..6986ea31c984 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4648,11 +4648,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) #endif } +static inline bool entity_is_long_sleeper(struct sched_entity *se) +{ + struct cfs_rq *cfs_rq; + u64 sleep_time; + + if (se->exec_start == 0) + return false; + + cfs_rq = cfs_rq_of(se); + + sleep_time = rq_clock_task(rq_of(cfs_rq)); + + /* Happen while migrating because of clock task divergence */ + if (sleep_time <= se->exec_start) + return false; + + sleep_time -= se->exec_start; + if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD))) + return true; + + return false; +} + static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) { u64 vruntime = cfs_rq->min_vruntime; - u64 sleep_time; /* * The 'current' period is already promised to the current tasks, @@ -4684,13 +4706,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) /* * Pull vruntime of the entity being placed to the base level of - * cfs_rq, to prevent boosting it if placed backwards. If the entity - * slept for a long time, don't even try to compare its vruntime with - * the base as it may be too far off and the comparison may get - * inversed due to s64 overflow. - */ - sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start; - if ((s64)sleep_time > 60LL * NSEC_PER_SEC) + * cfs_rq, to prevent boosting it if placed backwards. + * However, min_vruntime can advance much faster than real time, with + * the extreme being when an entity with the minimal weight always runs + * on the cfs_rq. If the waking entity slept for a long time, its + * vruntime difference from min_vruntime may overflow s64 and their + * comparison may get inversed, so ignore the entity's original + * vruntime in that case. + * The maximal vruntime speedup is given by the ratio of normal to + * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES. + * When placing a migrated waking entity, its exec_start has been set + * from a different rq. In order to take into account a possible + * divergence between new and prev rq's clocks task because of irq and + * stolen time, we take an additional margin. + * So, cutting off on the sleep time of + * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days + * should be safe. + */ + if (entity_is_long_sleeper(se)) se->vruntime = vruntime; else se->vruntime = max_vruntime(se->vruntime, vruntime); @@ -4770,6 +4803,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (flags & ENQUEUE_WAKEUP) place_entity(cfs_rq, se, 0); + /* Entity has migrated, no longer consider this task hot */ + if (flags & ENQUEUE_MIGRATED) + se->exec_start = 0; check_schedstat_required(); update_stats_enqueue_fair(cfs_rq, se, flags); @@ -7657,9 +7693,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) /* Tell new CPU we are migrated */ se->avg.last_update_time = 0; - /* We have migrated, no longer consider this task hot */ - se->exec_start = 0; - update_scan_period(p, new_cpu); } -- cgit From 263f5ecaf7080513efc248ec739b6d9e00f4129f Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Mar 2023 04:33:38 -0700 Subject: perf/x86/amd/core: Always clear status for idx The variable 'status' (which contains the unhandled overflow bits) is not being properly masked in some cases, displaying the following warning: WARNING: CPU: 156 PID: 475601 at arch/x86/events/amd/core.c:972 amd_pmu_v2_handle_irq+0x216/0x270 This seems to be happening because the loop is being continued before the status bit being unset, in case x86_perf_event_set_period() returns 0. This is also causing an inconsistency because the "handled" counter is incremented, but the status bit is not cleaned. Move the bit cleaning together above, together when the "handled" counter is incremented. Fixes: 7685665c390d ("perf/x86/amd/core: Add PerfMonV2 overflow handling") Signed-off-by: Breno Leitao Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sandipan Das Link: https://lore.kernel.org/r/20230321113338.1669660-1-leitao@debian.org --- arch/x86/events/amd/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 8c45b198b62f..bccea57dee81 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -923,6 +923,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) /* Event overflow */ handled++; + status &= ~mask; perf_sample_data_init(&data, 0, hwc->last_period); if (!x86_perf_event_set_period(event)) @@ -933,8 +934,6 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); - - status &= ~mask; } /* -- cgit From b416514054810cf2d2cc348ae477cea619b64da7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 15 Mar 2023 19:43:43 +0000 Subject: entry/rcu: Check TIF_RESCHED _after_ delayed RCU wake-up RCU sometimes needs to perform a delayed wake up for specific kthreads handling offloaded callbacks (RCU_NOCB). These wakeups are performed by timers and upon entry to idle (also to guest and to user on nohz_full). However the delayed wake-up on kernel exit is actually performed after the thread flags are fetched towards the fast path check for work to do on exit to user. As a result, and if there is no other pending work to do upon that kernel exit, the current task will resume to userspace with TIF_RESCHED set and the pending wake up ignored. Fix this with fetching the thread flags _after_ the delayed RCU-nocb kthread wake-up. Fixes: 47b8ff194c1f ("entry: Explicitly flush pending rcuog wakeup before last rescheduling point") Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230315194349.10798-3-joel@joelfernandes.org --- kernel/entry/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 1314894d2efa..be61332c66b5 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -192,13 +192,14 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, static void exit_to_user_mode_prepare(struct pt_regs *regs) { - unsigned long ti_work = read_thread_flags(); + unsigned long ti_work; lockdep_assert_irqs_disabled(); /* Flush pending rcuog wakeup before the last need_resched() check */ tick_nohz_user_enter_prepare(); + ti_work = read_thread_flags(); if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) ti_work = exit_to_user_mode_loop(regs, ti_work); -- cgit From 97fd768e501fd5d377cb0bf46a35bad2cd21c153 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Mar 2023 15:17:57 +0100 Subject: efi/libstub: zboot: Add compressed image to make targets Avoid needlessly rebuilding the compressed image by adding the file 'vmlinuz' to the 'targets' Kbuild make variable. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/Makefile.zboot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile.zboot b/drivers/firmware/efi/libstub/Makefile.zboot index 43e9a4cab9f5..ccdd6a130d98 100644 --- a/drivers/firmware/efi/libstub/Makefile.zboot +++ b/drivers/firmware/efi/libstub/Makefile.zboot @@ -44,4 +44,4 @@ OBJCOPYFLAGS_vmlinuz.efi := -O binary $(obj)/vmlinuz.efi: $(obj)/vmlinuz.efi.elf FORCE $(call if_changed,objcopy) -targets += zboot-header.o vmlinuz.o vmlinuz.efi.elf vmlinuz.efi +targets += zboot-header.o vmlinuz vmlinuz.o vmlinuz.efi.elf vmlinuz.efi -- cgit From 2b91c4a870c9830eaf95e744454c9c218cccb736 Mon Sep 17 00:00:00 2001 From: Iwona Winiarska Date: Tue, 21 Mar 2023 10:04:10 +0100 Subject: hwmon: (peci/cputemp) Fix miscalculated DTS for SKX For Skylake, DTS temperature of the CPU is reported in S10.6 format instead of S8.8. Reported-by: Paul Fertser Link: https://lore.kernel.org/lkml/ZBhHS7v+98NK56is@home.paul.comp/ Signed-off-by: Iwona Winiarska Link: https://lore.kernel.org/r/20230321090410.866766-1-iwona.winiarska@intel.com Signed-off-by: Guenter Roeck --- drivers/hwmon/peci/cputemp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/peci/cputemp.c b/drivers/hwmon/peci/cputemp.c index 30850a479f61..87d56f0fc888 100644 --- a/drivers/hwmon/peci/cputemp.c +++ b/drivers/hwmon/peci/cputemp.c @@ -537,6 +537,12 @@ static const struct cpu_info cpu_hsx = { .thermal_margin_to_millidegree = &dts_eight_dot_eight_to_millidegree, }; +static const struct cpu_info cpu_skx = { + .reg = &resolved_cores_reg_hsx, + .min_peci_revision = 0x33, + .thermal_margin_to_millidegree = &dts_ten_dot_six_to_millidegree, +}; + static const struct cpu_info cpu_icx = { .reg = &resolved_cores_reg_icx, .min_peci_revision = 0x40, @@ -558,7 +564,7 @@ static const struct auxiliary_device_id peci_cputemp_ids[] = { }, { .name = "peci_cpu.cputemp.skx", - .driver_data = (kernel_ulong_t)&cpu_hsx, + .driver_data = (kernel_ulong_t)&cpu_skx, }, { .name = "peci_cpu.cputemp.icx", -- cgit From 2315332efcbe7124252f080e03b57d3d2f1f4771 Mon Sep 17 00:00:00 2001 From: Phinex Hung Date: Tue, 21 Mar 2023 14:02:23 +0800 Subject: hwmon: fix potential sensor registration fail if of_node is missing It is not sufficient to check of_node in current device. In some cases, this would cause the sensor registration to fail. This patch looks for device's ancestors to find a valid of_node if any. Fixes: d560168b5d0f ("hwmon: (core) New hwmon registration API") Signed-off-by: Phinex Hung Link: https://lore.kernel.org/r/20230321060224.3819-1-phinex@realtek.com Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 33edb5c02f7d..d193ed3cb35e 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -757,6 +757,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, struct hwmon_device *hwdev; const char *label; struct device *hdev; + struct device *tdev = dev; int i, err, id; /* Complain about invalid characters in hwmon name attribute */ @@ -826,7 +827,9 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, hwdev->name = name; hdev->class = &hwmon_class; hdev->parent = dev; - hdev->of_node = dev ? dev->of_node : NULL; + while (tdev && !tdev->of_node) + tdev = tdev->parent; + hdev->of_node = tdev ? tdev->of_node : NULL; hwdev->chip = chip; dev_set_drvdata(hdev, drvdata); dev_set_name(hdev, HWMON_ID_FORMAT, id); @@ -838,7 +841,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, INIT_LIST_HEAD(&hwdev->tzdata); - if (dev && dev->of_node && chip && chip->ops->read && + if (hdev->of_node && chip && chip->ops->read && chip->info[0]->type == hwmon_chip && (chip->info[0]->config[0] & HWMON_C_REGISTER_TZ)) { err = hwmon_thermal_register_sensors(hdev); -- cgit From 813cc94c7847ae4a17e9f744fb4dbdf7df6bd732 Mon Sep 17 00:00:00 2001 From: Tianyi Jing Date: Sat, 18 Mar 2023 22:38:51 +0800 Subject: hwmon: (xgene) Fix ioremap and memremap leak Smatch reports: drivers/hwmon/xgene-hwmon.c:757 xgene_hwmon_probe() warn: 'ctx->pcc_comm_addr' from ioremap() not released on line: 757. This is because in drivers/hwmon/xgene-hwmon.c:701 xgene_hwmon_probe(), ioremap and memremap is not released, which may cause a leak. To fix this, ioremap and memremap is modified to devm_ioremap and devm_memremap. Signed-off-by: Tianyi Jing Reviewed-by: Dongliang Mu Link: https://lore.kernel.org/r/20230318143851.2191625-1-jingfelix@hust.edu.cn [groeck: Fixed formatting and subject] Signed-off-by: Guenter Roeck --- drivers/hwmon/xgene-hwmon.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index d1abea49f01b..78d9f52e2a71 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -698,14 +698,14 @@ static int xgene_hwmon_probe(struct platform_device *pdev) ctx->comm_base_addr = pcc_chan->shmem_base_addr; if (ctx->comm_base_addr) { if (version == XGENE_HWMON_V2) - ctx->pcc_comm_addr = (void __force *)ioremap( - ctx->comm_base_addr, - pcc_chan->shmem_size); + ctx->pcc_comm_addr = (void __force *)devm_ioremap(&pdev->dev, + ctx->comm_base_addr, + pcc_chan->shmem_size); else - ctx->pcc_comm_addr = memremap( - ctx->comm_base_addr, - pcc_chan->shmem_size, - MEMREMAP_WB); + ctx->pcc_comm_addr = devm_memremap(&pdev->dev, + ctx->comm_base_addr, + pcc_chan->shmem_size, + MEMREMAP_WB); } else { dev_err(&pdev->dev, "Failed to get PCC comm region\n"); rc = -ENODEV; -- cgit From b69245126a48e50882021180fa5d264dc7149ccc Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 15 Mar 2023 22:54:08 +0900 Subject: bootconfig: Fix testcase to increase max node Since commit 6c40624930c5 ("bootconfig: Increase max nodes of bootconfig from 1024 to 8192 for DCC support") increased the max number of bootconfig node to 8192, the bootconfig testcase of the max number of nodes fails. To fix this issue, we can not simply increase the number in the test script because the test bootconfig file becomes too big (>32KB). To fix that, we can use a combination of three alphabets (26^3 = 17576). But with that, we can not express the 8193 (just one exceed from the limitation) because it also exceeds the max size of bootconfig. So, the first 26 nodes will just use one alphabet. With this fix, test-bootconfig.sh passes all tests. Link: https://lore.kernel.org/all/167888844790.791176.670805252426835131.stgit@devnote2/ Reported-by: Heinz Wiesinger Link: https://lore.kernel.org/all/2463802.XAFRqVoOGU@amaterasu.liwjatan.org Fixes: 6c40624930c5 ("bootconfig: Increase max nodes of bootconfig from 1024 to 8192 for DCC support") Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) --- tools/bootconfig/test-bootconfig.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index f68e2e9eef8b..a2c484c243f5 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -87,10 +87,14 @@ xfail grep -i "error" $OUTFILE echo "Max node number check" -echo -n > $TEMPCONF -for i in `seq 1 1024` ; do - echo "node$i" >> $TEMPCONF -done +awk ' +BEGIN { + for (i = 0; i < 26; i += 1) + printf("%c\n", 65 + i % 26) + for (i = 26; i < 8192; i += 1) + printf("%c%c%c\n", 65 + i % 26, 65 + (i / 26) % 26, 65 + (i / 26 / 26)) +} +' > $TEMPCONF xpass $BOOTCONF -a $TEMPCONF $INITRD echo "badnode" >> $TEMPCONF -- cgit From 47f9e4c924025c5be87959d3335e66fcbb7f6b5c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Mar 2023 15:15:18 +0000 Subject: keys: Do not cache key in task struct if key is requested from kernel thread The key which gets cached in task structure from a kernel thread does not get invalidated even after expiry. Due to which, a new key request from kernel thread will be served with the cached key if it's present in task struct irrespective of the key validity. The change is to not cache key in task_struct when key requested from kernel thread so that kernel thread gets a valid key on every key request. The problem has been seen with the cifs module doing DNS lookups from a kernel thread and the results getting pinned by being attached to that kernel thread's cache - and thus not something that can be easily got rid of. The cache would ordinarily be cleared by notify-resume, but kernel threads don't do that. This isn't seen with AFS because AFS is doing request_key() within the kernel half of a user thread - which will do notify-resume. Fixes: 7743c48e54ee ("keys: Cache result of request_key*() temporarily in task_struct") Signed-off-by: Bharath SM Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen cc: Shyam Prasad N cc: Steve French cc: keyrings@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/CAGypqWw951d=zYRbdgNR4snUDvJhWL=q3=WOyh7HhSJupjz2vA@mail.gmail.com/ --- security/keys/request_key.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 2da4404276f0..07a0ef2baacd 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -38,9 +38,12 @@ static void cache_requested_key(struct key *key) #ifdef CONFIG_KEYS_REQUEST_CACHE struct task_struct *t = current; - key_put(t->cached_requested_key); - t->cached_requested_key = key_get(key); - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); + /* Do not cache key if it is a kernel thread */ + if (!(t->flags & PF_KTHREAD)) { + key_put(t->cached_requested_key); + t->cached_requested_key = key_get(key); + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); + } #endif } -- cgit From 4fc5c74dde69a7eda172514aaeb5a7df3600adb3 Mon Sep 17 00:00:00 2001 From: Robbie Harwood Date: Mon, 20 Feb 2023 12:12:53 -0500 Subject: verify_pefile: relax wrapper length check The PE Format Specification (section "The Attribute Certificate Table (Image Only)") states that `dwLength` is to be rounded up to 8-byte alignment when used for traversal. Therefore, the field is not required to be an 8-byte multiple in the first place. Accordingly, pesign has not performed this alignment since version 0.110. This causes kexec failure on pesign'd binaries with "PEFILE: Signature wrapper len wrong". Update the comment and relax the check. Signed-off-by: Robbie Harwood Signed-off-by: David Howells cc: Jarkko Sakkinen cc: Eric Biederman cc: Herbert Xu cc: keyrings@vger.kernel.org cc: linux-crypto@vger.kernel.org cc: kexec@lists.infradead.org Link: https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-attribute-certificate-table-image-only Link: https://github.com/rhboot/pesign Link: https://lore.kernel.org/r/20230220171254.592347-2-rharwood@redhat.com/ # v2 --- crypto/asymmetric_keys/verify_pefile.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c index 7553ab18db89..fe1bb374239d 100644 --- a/crypto/asymmetric_keys/verify_pefile.c +++ b/crypto/asymmetric_keys/verify_pefile.c @@ -135,11 +135,15 @@ static int pefile_strip_sig_wrapper(const void *pebuf, pr_debug("sig wrapper = { %x, %x, %x }\n", wrapper.length, wrapper.revision, wrapper.cert_type); - /* Both pesign and sbsign round up the length of certificate table - * (in optional header data directories) to 8 byte alignment. + /* sbsign rounds up the length of certificate table (in optional + * header data directories) to 8 byte alignment. However, the PE + * specification states that while entries are 8-byte aligned, this is + * not included in their length, and as a result, pesign has not + * rounded up since 0.110. */ - if (round_up(wrapper.length, 8) != ctx->sig_len) { - pr_debug("Signature wrapper len wrong\n"); + if (wrapper.length > ctx->sig_len) { + pr_debug("Signature wrapper bigger than sig len (%x > %x)\n", + ctx->sig_len, wrapper.length); return -ELIBBAD; } if (wrapper.revision != WIN_CERT_REVISION_2_0) { -- cgit From 3584c1dbfffdabf8e3dc1dd25748bb38dd01cd43 Mon Sep 17 00:00:00 2001 From: Robbie Harwood Date: Mon, 20 Feb 2023 12:12:54 -0500 Subject: asymmetric_keys: log on fatal failures in PE/pkcs7 These particular errors can be encountered while trying to kexec when secureboot lockdown is in place. Without this change, even with a signed debug build, one still needs to reboot the machine to add the appropriate dyndbg parameters (since lockdown blocks debugfs). Accordingly, upgrade all pr_debug() before fatal error into pr_warn(). Signed-off-by: Robbie Harwood Signed-off-by: David Howells cc: Jarkko Sakkinen cc: Eric Biederman cc: Herbert Xu cc: keyrings@vger.kernel.org cc: linux-crypto@vger.kernel.org cc: kexec@lists.infradead.org Link: https://lore.kernel.org/r/20230220171254.592347-3-rharwood@redhat.com/ # v2 --- crypto/asymmetric_keys/pkcs7_verify.c | 10 +++++----- crypto/asymmetric_keys/verify_pefile.c | 24 ++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 4fa769c4bcdb..f0d4ff3c20a8 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -79,16 +79,16 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7, } if (sinfo->msgdigest_len != sig->digest_size) { - pr_debug("Sig %u: Invalid digest size (%u)\n", - sinfo->index, sinfo->msgdigest_len); + pr_warn("Sig %u: Invalid digest size (%u)\n", + sinfo->index, sinfo->msgdigest_len); ret = -EBADMSG; goto error; } if (memcmp(sig->digest, sinfo->msgdigest, sinfo->msgdigest_len) != 0) { - pr_debug("Sig %u: Message digest doesn't match\n", - sinfo->index); + pr_warn("Sig %u: Message digest doesn't match\n", + sinfo->index); ret = -EKEYREJECTED; goto error; } @@ -478,7 +478,7 @@ int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7, const void *data, size_t datalen) { if (pkcs7->data) { - pr_debug("Data already supplied\n"); + pr_warn("Data already supplied\n"); return -EINVAL; } pkcs7->data = data; diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c index fe1bb374239d..22beaf2213a2 100644 --- a/crypto/asymmetric_keys/verify_pefile.c +++ b/crypto/asymmetric_keys/verify_pefile.c @@ -74,7 +74,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen, break; default: - pr_debug("Unknown PEOPT magic = %04hx\n", pe32->magic); + pr_warn("Unknown PEOPT magic = %04hx\n", pe32->magic); return -ELIBBAD; } @@ -95,7 +95,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen, ctx->certs_size = ddir->certs.size; if (!ddir->certs.virtual_address || !ddir->certs.size) { - pr_debug("Unsigned PE binary\n"); + pr_warn("Unsigned PE binary\n"); return -ENODATA; } @@ -127,7 +127,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf, unsigned len; if (ctx->sig_len < sizeof(wrapper)) { - pr_debug("Signature wrapper too short\n"); + pr_warn("Signature wrapper too short\n"); return -ELIBBAD; } @@ -142,16 +142,16 @@ static int pefile_strip_sig_wrapper(const void *pebuf, * rounded up since 0.110. */ if (wrapper.length > ctx->sig_len) { - pr_debug("Signature wrapper bigger than sig len (%x > %x)\n", - ctx->sig_len, wrapper.length); + pr_warn("Signature wrapper bigger than sig len (%x > %x)\n", + ctx->sig_len, wrapper.length); return -ELIBBAD; } if (wrapper.revision != WIN_CERT_REVISION_2_0) { - pr_debug("Signature is not revision 2.0\n"); + pr_warn("Signature is not revision 2.0\n"); return -ENOTSUPP; } if (wrapper.cert_type != WIN_CERT_TYPE_PKCS_SIGNED_DATA) { - pr_debug("Signature certificate type is not PKCS\n"); + pr_warn("Signature certificate type is not PKCS\n"); return -ENOTSUPP; } @@ -164,7 +164,7 @@ static int pefile_strip_sig_wrapper(const void *pebuf, ctx->sig_offset += sizeof(wrapper); ctx->sig_len -= sizeof(wrapper); if (ctx->sig_len < 4) { - pr_debug("Signature data missing\n"); + pr_warn("Signature data missing\n"); return -EKEYREJECTED; } @@ -198,7 +198,7 @@ check_len: return 0; } not_pkcs7: - pr_debug("Signature data not PKCS#7\n"); + pr_warn("Signature data not PKCS#7\n"); return -ELIBBAD; } @@ -341,8 +341,8 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen, digest_size = crypto_shash_digestsize(tfm); if (digest_size != ctx->digest_len) { - pr_debug("Digest size mismatch (%zx != %x)\n", - digest_size, ctx->digest_len); + pr_warn("Digest size mismatch (%zx != %x)\n", + digest_size, ctx->digest_len); ret = -EBADMSG; goto error_no_desc; } @@ -373,7 +373,7 @@ static int pefile_digest_pe(const void *pebuf, unsigned int pelen, * PKCS#7 certificate. */ if (memcmp(digest, ctx->digest, ctx->digest_len) != 0) { - pr_debug("Digest mismatch\n"); + pr_warn("Digest mismatch\n"); ret = -EKEYREJECTED; } else { pr_debug("The digests match!\n"); -- cgit From 387d42ae6df76d2ae813432d05630535a5480038 Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Thu, 9 Mar 2023 13:38:56 -0800 Subject: ice: fix rx buffers handling for flow director packets Adding flow director filters stopped working correctly after commit 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side"). As a result, only first flow director filter can be added, adding next filter leads to NULL pointer dereference attached below. Rx buffer handling and reallocation logic has been optimized, however flow director specific traffic was not accounted for. As a result driver handled those packets incorrectly since new logic was based on ice_rx_ring::first_desc which was not set in this case. Fix this by setting struct ice_rx_ring::first_desc to next_to_clean for flow director received packets. [ 438.544867] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 438.551840] #PF: supervisor read access in kernel mode [ 438.556978] #PF: error_code(0x0000) - not-present page [ 438.562115] PGD 7c953b2067 P4D 0 [ 438.565436] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 438.569794] CPU: 0 PID: 0 Comm: swapper/0 Kdump: loaded Not tainted 6.2.0-net-bug #1 [ 438.577531] Hardware name: Intel Corporation M50CYP2SBSTD/M50CYP2SBSTD, BIOS SE5C620.86B.01.01.0005.2202160810 02/16/2022 [ 438.588470] RIP: 0010:ice_clean_rx_irq+0x2b9/0xf20 [ice] [ 438.593860] Code: 45 89 f7 e9 ac 00 00 00 8b 4d 78 41 31 4e 10 41 09 d5 4d 85 f6 0f 84 82 00 00 00 49 8b 4e 08 41 8b 76 1c 65 8b 3d 47 36 4a 3f <48> 8b 11 48 c1 ea 36 39 d7 0f 85 a6 00 00 00 f6 41 08 02 0f 85 9c [ 438.612605] RSP: 0018:ff8c732640003ec8 EFLAGS: 00010082 [ 438.617831] RAX: 0000000000000800 RBX: 00000000000007ff RCX: 0000000000000000 [ 438.624957] RDX: 0000000000000800 RSI: 0000000000000000 RDI: 0000000000000000 [ 438.632089] RBP: ff4ed275a2158200 R08: 00000000ffffffff R09: 0000000000000020 [ 438.639222] R10: 0000000000000000 R11: 0000000000000020 R12: 0000000000001000 [ 438.646356] R13: 0000000000000000 R14: ff4ed275d0daffe0 R15: 0000000000000000 [ 438.653485] FS: 0000000000000000(0000) GS:ff4ed2738fa00000(0000) knlGS:0000000000000000 [ 438.661563] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 438.667310] CR2: 0000000000000000 CR3: 0000007c9f0d6006 CR4: 0000000000771ef0 [ 438.674444] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 438.681573] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 438.688697] PKRU: 55555554 [ 438.691404] Call Trace: [ 438.693857] [ 438.695877] ? profile_tick+0x17/0x80 [ 438.699542] ice_msix_clean_ctrl_vsi+0x24/0x50 [ice] [ 438.702571] ice 0000:b1:00.0: VF 1: ctrl_vsi irq timeout [ 438.704542] __handle_irq_event_percpu+0x43/0x1a0 [ 438.704549] handle_irq_event+0x34/0x70 [ 438.704554] handle_edge_irq+0x9f/0x240 [ 438.709901] iavf 0000:b1:01.1: Failed to add Flow Director filter with status: 6 [ 438.714571] __common_interrupt+0x63/0x100 [ 438.714580] common_interrupt+0xb4/0xd0 [ 438.718424] iavf 0000:b1:01.1: Rule ID: 127 dst_ip: 0.0.0.0 src_ip 0.0.0.0 UDP: dst_port 4 src_port 0 [ 438.722255] [ 438.722257] [ 438.722257] asm_common_interrupt+0x22/0x40 [ 438.722262] RIP: 0010:cpuidle_enter_state+0xc8/0x430 [ 438.722267] Code: 6e e9 25 ff e8 f9 ef ff ff 8b 53 04 49 89 c5 0f 1f 44 00 00 31 ff e8 d7 f1 24 ff 45 84 ff 0f 85 57 02 00 00 fb 0f 1f 44 00 00 <45> 85 f6 0f 88 85 01 00 00 49 63 d6 48 8d 04 52 48 8d 04 82 49 8d [ 438.722269] RSP: 0018:ffffffff86003e50 EFLAGS: 00000246 [ 438.784108] RAX: ff4ed2738fa00000 RBX: ffbe72a64fc01020 RCX: 0000000000000000 [ 438.791234] RDX: 0000000000000000 RSI: ffffffff858d84de RDI: ffffffff85893641 [ 438.798365] RBP: 0000000000000002 R08: 0000000000000002 R09: 000000003158af9d [ 438.805490] R10: 0000000000000008 R11: 0000000000000354 R12: ffffffff862365a0 [ 438.812622] R13: 000000661b472a87 R14: 0000000000000002 R15: 0000000000000000 [ 438.819757] cpuidle_enter+0x29/0x40 [ 438.823333] do_idle+0x1b6/0x230 [ 438.826566] cpu_startup_entry+0x19/0x20 [ 438.830492] rest_init+0xcb/0xd0 [ 438.833717] arch_call_rest_init+0xa/0x30 [ 438.837731] start_kernel+0x776/0xb70 [ 438.841396] secondary_startup_64_no_verify+0xe5/0xeb [ 438.846449] Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Signed-off-by: Piotr Raczynski Acked-by: Maciej Fijalkowski Reviewed-by: Simon Horman Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dfd22862e926..b61dd9f01540 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1210,6 +1210,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); if (++ntc == cnt) ntc = 0; + rx_ring->first_desc = ntc; continue; } -- cgit From 83b49e7f63da88a1544cba2b2e40bfabb24bd203 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 10 Mar 2023 12:33:44 +0100 Subject: ice: check if VF exists before mode check Setting trust on VF should return EINVAL when there is no VF. Move checking for switchdev mode after checking if VF exists. Fixes: c54d209c78b8 ("ice: Wait for VF to be reset/ready before configuration") Signed-off-by: Michal Swiatkowski Signed-off-by: Kalyan Kodamagula Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 96a64c25e2ef..0cc05e54a781 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1341,15 +1341,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) struct ice_vf *vf; int ret; + vf = ice_get_vf_by_id(pf, vf_id); + if (!vf) + return -EINVAL; + if (ice_is_eswitch_mode_switchdev(pf)) { dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); return -EOPNOTSUPP; } - vf = ice_get_vf_by_id(pf, vf_id); - if (!vf) - return -EINVAL; - ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; -- cgit From 7d46c0e670d5f646879b52bacc387bf48ff0e7f1 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 13 Mar 2023 13:09:15 +0100 Subject: ice: remove filters only if VSI is deleted Filters shouldn't be removed in VSI rebuild path. Removing them on PF VSI results in no rule for PF MAC after changing for example queues amount. Remove all filters only in the VSI remove flow. As unload should also cause the filter to be removed introduce, a new function ice_stop_eth(). It will unroll ice_start_eth(), so remove filters and close VSI. Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") Signed-off-by: Michal Swiatkowski Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 0f52ea38b6f3..450317dfcca7 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -291,6 +291,7 @@ static void ice_vsi_delete_from_hw(struct ice_vsi *vsi) struct ice_vsi_ctx *ctxt; int status; + ice_fltr_remove_all(vsi); ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (!ctxt) return; @@ -2892,7 +2893,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi) !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) ice_cfg_sw_lldp(vsi, false, false); - ice_fltr_remove_all(vsi); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); if (err) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c233464b8f6b..0d8b8c6f9bd3 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4641,6 +4641,12 @@ static int ice_start_eth(struct ice_vsi *vsi) return err; } +static void ice_stop_eth(struct ice_vsi *vsi) +{ + ice_fltr_remove_all(vsi); + ice_vsi_close(vsi); +} + static int ice_init_eth(struct ice_pf *pf) { struct ice_vsi *vsi = ice_get_main_vsi(pf); @@ -5129,7 +5135,7 @@ void ice_unload(struct ice_pf *pf) { ice_deinit_features(pf); ice_deinit_rdma(pf); - ice_vsi_close(ice_get_main_vsi(pf)); + ice_stop_eth(ice_get_main_vsi(pf)); ice_vsi_decfg(ice_get_main_vsi(pf)); ice_deinit_dev(pf); } -- cgit From 4e264be98b88a6d6f476c11087fe865696e8bef5 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 13 Mar 2023 17:06:45 +0100 Subject: iavf: fix hang on reboot with ice When a system with E810 with existing VFs gets rebooted the following hang may be observed. Pid 1 is hung in iavf_remove(), part of a network driver: PID: 1 TASK: ffff965400e5a340 CPU: 24 COMMAND: "systemd-shutdow" #0 [ffffaad04005fa50] __schedule at ffffffff8b3239cb #1 [ffffaad04005fae8] schedule at ffffffff8b323e2d #2 [ffffaad04005fb00] schedule_hrtimeout_range_clock at ffffffff8b32cebc #3 [ffffaad04005fb80] usleep_range_state at ffffffff8b32c930 #4 [ffffaad04005fbb0] iavf_remove at ffffffffc12b9b4c [iavf] #5 [ffffaad04005fbf0] pci_device_remove at ffffffff8add7513 #6 [ffffaad04005fc10] device_release_driver_internal at ffffffff8af08baa #7 [ffffaad04005fc40] pci_stop_bus_device at ffffffff8adcc5fc #8 [ffffaad04005fc60] pci_stop_and_remove_bus_device at ffffffff8adcc81e #9 [ffffaad04005fc70] pci_iov_remove_virtfn at ffffffff8adf9429 #10 [ffffaad04005fca8] sriov_disable at ffffffff8adf98e4 #11 [ffffaad04005fcc8] ice_free_vfs at ffffffffc04bb2c8 [ice] #12 [ffffaad04005fd10] ice_remove at ffffffffc04778fe [ice] #13 [ffffaad04005fd38] ice_shutdown at ffffffffc0477946 [ice] #14 [ffffaad04005fd50] pci_device_shutdown at ffffffff8add58f1 #15 [ffffaad04005fd70] device_shutdown at ffffffff8af05386 #16 [ffffaad04005fd98] kernel_restart at ffffffff8a92a870 #17 [ffffaad04005fda8] __do_sys_reboot at ffffffff8a92abd6 #18 [ffffaad04005fee0] do_syscall_64 at ffffffff8b317159 #19 [ffffaad04005ff08] __context_tracking_enter at ffffffff8b31b6fc #20 [ffffaad04005ff18] syscall_exit_to_user_mode at ffffffff8b31b50d #21 [ffffaad04005ff28] do_syscall_64 at ffffffff8b317169 #22 [ffffaad04005ff50] entry_SYSCALL_64_after_hwframe at ffffffff8b40009b RIP: 00007f1baa5c13d7 RSP: 00007fffbcc55a98 RFLAGS: 00000202 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1baa5c13d7 RDX: 0000000001234567 RSI: 0000000028121969 RDI: 00000000fee1dead RBP: 00007fffbcc55ca0 R8: 0000000000000000 R9: 00007fffbcc54e90 R10: 00007fffbcc55050 R11: 0000000000000202 R12: 0000000000000005 R13: 0000000000000000 R14: 00007fffbcc55af0 R15: 0000000000000000 ORIG_RAX: 00000000000000a9 CS: 0033 SS: 002b During reboot all drivers PM shutdown callbacks are invoked. In iavf_shutdown() the adapter state is changed to __IAVF_REMOVE. In ice_shutdown() the call chain above is executed, which at some point calls iavf_remove(). However iavf_remove() expects the VF to be in one of the states __IAVF_RUNNING, __IAVF_DOWN or __IAVF_INIT_FAILED. If that's not the case it sleeps forever. So if iavf_shutdown() gets invoked before iavf_remove() the system will hang indefinitely because the adapter is already in state __IAVF_REMOVE. Fix this by returning from iavf_remove() if the state is __IAVF_REMOVE, as we already went through iavf_shutdown(). Fixes: 974578017fc1 ("iavf: Add waiting so the port is initialized in remove") Fixes: a8417330f8a5 ("iavf: Fix race condition between iavf_shutdown and iavf_remove") Reported-by: Marius Cornea Signed-off-by: Stefan Assmann Reviewed-by: Michal Kubiak Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 327cd9b1af2c..095201e83c9d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -5074,6 +5074,11 @@ static void iavf_remove(struct pci_dev *pdev) mutex_unlock(&adapter->crit_lock); break; } + /* Simply return if we already went through iavf_shutdown */ + if (adapter->state == __IAVF_REMOVE) { + mutex_unlock(&adapter->crit_lock); + return; + } mutex_unlock(&adapter->crit_lock); usleep_range(500, 1000); -- cgit From c672297bbc0e86dbf88396b8053e2fbb173f16ff Mon Sep 17 00:00:00 2001 From: Radoslaw Tyl Date: Mon, 13 Mar 2023 15:07:33 +0100 Subject: i40e: fix flow director packet filter programming Initialize to zero structures to build a valid Tx Packet used for the filter programming. Fixes: a9219b332f52 ("i40e: VLAN field for flow director") Signed-off-by: Radoslaw Tyl Reviewed-by: Michal Swiatkowski Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 924f972b91fa..72b091f2509d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -171,10 +171,10 @@ static char *i40e_create_dummy_packet(u8 *dummy_packet, bool ipv4, u8 l4proto, struct i40e_fdir_filter *data) { bool is_vlan = !!data->vlan_tag; - struct vlan_hdr vlan; - struct ipv6hdr ipv6; - struct ethhdr eth; - struct iphdr ip; + struct vlan_hdr vlan = {}; + struct ipv6hdr ipv6 = {}; + struct ethhdr eth = {}; + struct iphdr ip = {}; u8 *tmp; if (ipv4) { -- cgit From 10ec8ca8ec1a2f04c4ed90897225231c58c124a7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 20 Mar 2023 15:37:25 +0100 Subject: bpf: Adjust insufficient default bpf_jit_limit We've seen recent AWS EKS (Kubernetes) user reports like the following: After upgrading EKS nodes from v20230203 to v20230217 on our 1.24 EKS clusters after a few days a number of the nodes have containers stuck in ContainerCreating state or liveness/readiness probes reporting the following error: Readiness probe errored: rpc error: code = Unknown desc = failed to exec in container: failed to start exec "4a11039f730203ffc003b7[...]": OCI runtime exec failed: exec failed: unable to start container process: unable to init seccomp: error loading seccomp filter into kernel: error loading seccomp filter: errno 524: unknown However, we had not been seeing this issue on previous AMIs and it only started to occur on v20230217 (following the upgrade from kernel 5.4 to 5.10) with no other changes to the underlying cluster or workloads. We tried the suggestions from that issue (sysctl net.core.bpf_jit_limit=452534528) which helped to immediately allow containers to be created and probes to execute but after approximately a day the issue returned and the value returned by cat /proc/vmallocinfo | grep bpf_jit | awk '{s+=$2} END {print s}' was steadily increasing. I tested bpf tree to observe bpf_jit_charge_modmem, bpf_jit_uncharge_modmem their sizes passed in as well as bpf_jit_current under tcpdump BPF filter, seccomp BPF and native (e)BPF programs, and the behavior all looks sane and expected, that is nothing "leaking" from an upstream perspective. The bpf_jit_limit knob was originally added in order to avoid a situation where unprivileged applications loading BPF programs (e.g. seccomp BPF policies) consuming all the module memory space via BPF JIT such that loading of kernel modules would be prevented. The default limit was defined back in 2018 and while good enough back then, we are generally seeing far more BPF consumers today. Adjust the limit for the BPF JIT pool from originally 1/4 to now 1/2 of the module memory space to better reflect today's needs and avoid more users running into potentially hard to debug issues. Fixes: fdadd04931c2 ("bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K") Reported-by: Stephen Haynes Reported-by: Lefteris Alexakis Signed-off-by: Daniel Borkmann Link: https://github.com/awslabs/amazon-eks-ami/issues/1179 Link: https://github.com/awslabs/amazon-eks-ami/issues/1219 Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230320143725.8394-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b297e9f60ca1..e2d256c82072 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -972,7 +972,7 @@ static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); - bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, + bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 1, PAGE_SIZE), LONG_MAX); return 0; } -- cgit From c83172b0639c8a005c0dd3b36252dc22ddd9f19c Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Fri, 25 Nov 2022 04:15:40 +0200 Subject: net/mlx5e: Set uplink rep as NETNS_LOCAL Previously, NETNS_LOCAL was not set for uplink representors, inconsistent with VF representors, and allowed the uplink representor to be moved between net namespaces and separated from the VF representors it shares the core device with. Such usage would break the isolation model of namespaces, as devices in different namespaces would have access to shared memory. To solve this issue, set NETNS_LOCAL for uplink representors if eswitch is in switchdev mode. Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a7f2ab22cc40..7ca7e9b57607 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4150,8 +4150,12 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } - if (mlx5e_is_uplink_rep(priv)) + if (mlx5e_is_uplink_rep(priv)) { features = mlx5e_fix_uplink_rep_features(netdev, features); + features |= NETIF_F_NETNS_LOCAL; + } else { + features &= ~NETIF_F_NETNS_LOCAL; + } mutex_unlock(&priv->state_lock); -- cgit From 662404b24a4c4d839839ed25e3097571f5938b9b Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 9 Feb 2023 12:48:52 +0200 Subject: net/mlx5e: Block entering switchdev mode with ns inconsistency Upon entering switchdev mode, VF/SF representors are spawned in the devlink instance's net namespace, whereas the PF net device transforms into the uplink representor, remaining in the net namespace the PF net device was in. Therefore, if a PF net device's namespace is different from its parent devlink net namespace, entering switchdev mode can create an illegal situation where all representors sharing the same core device are NOT in the same net namespace. To avoid this issue, block entering switchdev mode for devices whose child netdev net namespace has diverged from the parent devlink's. Fixes: 7768d1971de6 ("net/mlx5: E-Switch, Add control for encapsulation") Signed-off-by: Gavin Li Reviewed-by: Gavi Teitz Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 22075943bb58..25a8076a77bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3405,6 +3405,18 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } +static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) +{ + struct net *devl_net, *netdev_net; + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + netdev_net = dev_net(esw->dev->mlx5e_res.uplink_netdev); + devl_net = devlink_net(devlink); + + return net_eq(devl_net, netdev_net); +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { @@ -3419,6 +3431,13 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && + !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); + return -EPERM; + } + mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { -- cgit From 922f56e9a795d6f3dd72d3428ebdd7ee040fa855 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Tue, 31 Jan 2023 14:07:03 +0200 Subject: net/mlx5: Fix steering rules cleanup vport's mc, uc and multicast rules are not deleted in teardown path when EEH happens. Since the vport's promisc settings(uc, mc and all) in firmware are reset after EEH, mlx5 driver will try to delete the above rules in the initialization path. This cause kernel crash because these software rules are no longer valid. Fix by nullifying these rules right after delete to avoid accessing any dangling pointers. Call Trace: __list_del_entry_valid+0xcc/0x100 (unreliable) tree_put_node+0xf4/0x1b0 [mlx5_core] tree_remove_node+0x30/0x70 [mlx5_core] mlx5_del_flow_rules+0x14c/0x1f0 [mlx5_core] esw_apply_vport_rx_mode+0x10c/0x200 [mlx5_core] esw_update_vport_rx_mode+0xb4/0x180 [mlx5_core] esw_vport_change_handle_locked+0x1ec/0x230 [mlx5_core] esw_enable_vport+0x130/0x260 [mlx5_core] mlx5_eswitch_enable_sriov+0x2a0/0x2f0 [mlx5_core] mlx5_device_enable_sriov+0x74/0x440 [mlx5_core] mlx5_load_one+0x114c/0x1550 [mlx5_core] mlx5_pci_resume+0x68/0xf0 [mlx5_core] eeh_report_resume+0x1a4/0x230 eeh_pe_dev_traverse+0x98/0x170 eeh_handle_normal_event+0x3e4/0x640 eeh_handle_event+0x4c/0x370 eeh_event_handler+0x14c/0x210 kthread+0x168/0x1b0 ret_from_kernel_thread+0x5c/0x84 Fixes: a35f71f27a61 ("net/mlx5: E-Switch, Implement promiscuous rx modes vf request handling") Signed-off-by: Huy Nguyen Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0f052513fefa..8bdf28762f41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -959,6 +959,7 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; + esw_apply_vport_rx_mode(esw, vport, false, false); esw_vport_cleanup(esw, vport); esw->enabled_vports--; -- cgit From 6e9d51b1a5cb8d750c3daf89db4f4cdfd1051819 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 1 Mar 2023 15:47:11 +0200 Subject: net/mlx5e: Initialize link speed to zero mlx5e_port_max_linkspeed does not guarantee value assignment for speed. Avoid cases where link_speed might be used uninitialized. In case mlx5e_port_max_linkspeed fails, a default link speed of 50000 will be used for the calculations. Fixes: 3f6d08d196b2 ("net/mlx5e: Add RSS support for hairpin") Signed-off-by: Roy Novich Reviewed-by: Tariq Toukan Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6bfed633343a..87a2850b32d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1103,8 +1103,8 @@ static void mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params, struct mlx5_core_dev *mdev) { + u32 link_speed = 0; u64 link_speed64; - u32 link_speed; hairpin_params->mdev = mdev; /* set hairpin pair per each 50Gbs share of the link */ -- cgit From 7e3fce82d945cf6e7f99034b113ff2d250d7524d Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Mon, 20 Mar 2023 13:13:55 +0200 Subject: net/mlx5e: Overcome slow response for first macsec ASO WQE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First ASO WQE poll causes a cache miss in hardware hence the resut is delayed. It causes to the situation where such WQE is polled earlier than it is needed. Add logic to retry ASO CQ polling operation. Fixes: 739cfa34518e ("net/mlx5: Make ASO poll CQ usable in atomic context")  Signed-off-by: Emeel Hakim Reviewed-by: Leon Romanovsky Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 8af53178e40d..33b3620ea45c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1412,6 +1412,7 @@ static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *mac struct mlx5e_macsec_aso *aso; struct mlx5_aso_wqe *aso_wqe; struct mlx5_aso *maso; + unsigned long expires; int err; aso = &macsec->aso; @@ -1425,7 +1426,13 @@ static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *mac macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL); mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); - err = mlx5_aso_poll_cq(maso, false); + expires = jiffies + msecs_to_jiffies(10); + do { + err = mlx5_aso_poll_cq(maso, false); + if (err) + usleep_range(2, 10); + } while (err && time_is_after_jiffies(expires)); + if (err) goto err_out; -- cgit From 44d553188c38ac74b799dfdcebafef2f7bb70942 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 15 Mar 2023 11:04:38 +0200 Subject: net/mlx5: Read the TC mapping of all priorities on ETS query When ETS configurations are queried by the user to get the mapping assignment between packet priority and traffic class, only priorities up to maximum TCs are queried from QTCT register in FW to retrieve their assigned TC, leaving the rest of the priorities mapped to the default TC #0 which might be misleading. Fix by querying the TC mapping of all priorities on each ETS query, regardless of the maximum number of TCs configured in FW. Fixes: 820c2c5e773d ("net/mlx5e: Read ETS settings directly from firmware") Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 2449731b7d79..89de92d06483 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -117,12 +117,14 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, ets)) return -EOPNOTSUPP; - ets->ets_cap = mlx5_max_tc(priv->mdev) + 1; - for (i = 0; i < ets->ets_cap; i++) { + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]); if (err) return err; + } + ets->ets_cap = mlx5_max_tc(priv->mdev) + 1; + for (i = 0; i < ets->ets_cap; i++) { err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]); if (err) return err; -- cgit From 640fcdbcf27fc62de9223f958ceb4e897a00e791 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Feb 2023 14:16:10 +0300 Subject: net/mlx5: E-Switch, Fix an Oops in error handling code The error handling dereferences "vport". There is nothing we can do if it is an error pointer except returning the error code. Fixes: 133dcfc577ea ("net/mlx5: E-Switch, Alloc and free unique metadata for match") Signed-off-by: Dan Carpenter Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index d55775627a47..50d2ea323979 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -364,8 +364,7 @@ int mlx5_esw_acl_ingress_vport_metadata_update(struct mlx5_eswitch *esw, u16 vpo if (WARN_ON_ONCE(IS_ERR(vport))) { esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num); - err = PTR_ERR(vport); - goto out; + return PTR_ERR(vport); } esw_acl_ingress_ofld_rules_destroy(esw, vport); -- cgit From 9a801afd3eb95e1a89aba17321062df06fb49d98 Mon Sep 17 00:00:00 2001 From: Dylan Jhong Date: Mon, 13 Mar 2023 11:49:06 +0800 Subject: riscv: mm: Fix incorrect ASID argument when flushing TLB Currently, we pass the CONTEXTID instead of the ASID to the TLB flush function. We should only take the ASID field to prevent from touching the reserved bit field. Fixes: 3f1e782998cd ("riscv: add ASID-based tlbflushing methods") Signed-off-by: Dylan Jhong Reviewed-by: Sergey Matyukevich Link: https://lore.kernel.org/r/20230313034906.2401730-1-dylan@andestech.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlbflush.h | 2 ++ arch/riscv/mm/context.c | 2 +- arch/riscv/mm/tlbflush.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 801019381dea..a09196f8de68 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -12,6 +12,8 @@ #include #ifdef CONFIG_MMU +extern unsigned long asid_mask; + static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 0f784e3d307b..12e22e7330e7 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -22,7 +22,7 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator); static unsigned long asid_bits; static unsigned long num_asids; -static unsigned long asid_mask; +unsigned long asid_mask; static atomic_long_t current_version; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 37ed760d007c..ef701fa83f36 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -42,7 +42,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, /* check if the tlbflush needs to be sent to other CPUs */ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; if (static_branch_unlikely(&use_asid_allocator)) { - unsigned long asid = atomic_long_read(&mm->context.id); + unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask; if (broadcast) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); -- cgit From 032a954061afd4b7426c3eb6bfd2952ef1e9a384 Mon Sep 17 00:00:00 2001 From: Álvaro Fernández Rojas Date: Sun, 19 Mar 2023 10:55:40 +0100 Subject: net: dsa: tag_brcm: legacy: fix daisy-chained switches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When BCM63xx internal switches are connected to switches with a 4-byte Broadcom tag, it does not identify the packet as VLAN tagged, so it adds one based on its PVID (which is likely 0). Right now, the packet is received by the BCM63xx internal switch and the 6-byte tag is properly processed. The next step would to decode the corresponding 4-byte tag. However, the internal switch adds an invalid VLAN tag after the 6-byte tag and the 4-byte tag handling fails. In order to fix this we need to remove the invalid VLAN tag after the 6-byte tag before passing it to the 4-byte tag decoding. Fixes: 964dbf186eaa ("net: dsa: tag_brcm: add support for legacy tags") Signed-off-by: Álvaro Fernández Rojas Reviewed-by: Michal Swiatkowski Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230319095540.239064-1-noltari@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_brcm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 10239daa5745..cacdafb41200 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -252,6 +253,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, struct net_device *dev) { + int len = BRCM_LEG_TAG_LEN; int source_port; u8 *brcm_tag; @@ -266,12 +268,16 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; + /* VLAN tag is added by BCM63xx internal switch */ + if (netdev_uses_dsa(skb->dev)) + len += VLAN_HLEN; + /* Remove Broadcom tag and update checksum */ - skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN); + skb_pull_rcsum(skb, len); dsa_default_offload_fwd_mark(skb); - dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN); + dsa_strip_etype_header(skb, len); return skb; } -- cgit From 968b66ffeb7956acc72836a7797aeb7b2444ec51 Mon Sep 17 00:00:00 2001 From: Frank Crawford Date: Sat, 18 Mar 2023 19:05:42 +1100 Subject: hwmon (it87): Fix voltage scaling for chips with 10.9mV ADCs Fix voltage scaling for chips that have 10.9mV ADCs, where scaling was not performed. Fixes: ead8080351c9 ("hwmon: (it87) Add support for IT8732F") Signed-off-by: Frank Crawford Link: https://lore.kernel.org/r/20230318080543.1226700-2-frank@crawford.emu.id.au [groeck: Update subject and description to focus on bug fix] Signed-off-by: Guenter Roeck --- drivers/hwmon/it87.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 66f7ceaa7c3f..e9614eb557d4 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -515,6 +515,8 @@ static const struct it87_devices it87_devices[] = { #define has_six_temp(data) ((data)->features & FEAT_SIX_TEMP) #define has_vin3_5v(data) ((data)->features & FEAT_VIN3_5V) #define has_conf_noexit(data) ((data)->features & FEAT_CONF_NOEXIT) +#define has_scaling(data) ((data)->features & (FEAT_12MV_ADC | \ + FEAT_10_9MV_ADC)) struct it87_sio_data { int sioaddr; @@ -3134,7 +3136,7 @@ static int it87_probe(struct platform_device *pdev) "Detected broken BIOS defaults, disabling PWM interface\n"); /* Starting with IT8721F, we handle scaling of internal voltages */ - if (has_12mv_adc(data)) { + if (has_scaling(data)) { if (sio_data->internal & BIT(0)) data->in_scaled |= BIT(3); /* in3 is AVCC */ if (sio_data->internal & BIT(1)) -- cgit From 4fe3c88552a3fbe1944426a4506a18cdeb457b5a Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Mon, 20 Mar 2023 14:33:18 +0000 Subject: atm: idt77252: fix kmemleak when rmmod idt77252 There are memory leaks reported by kmemleak: unreferenced object 0xffff888106500800 (size 128): comm "modprobe", pid 1017, jiffies 4297787785 (age 67.152s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000970ce626>] __kmem_cache_alloc_node+0x20c/0x380 [<00000000fb5f78d9>] kmalloc_trace+0x2f/0xb0 [<000000000e947e2a>] idt77252_init_one+0x2847/0x3c90 [idt77252] [<000000006efb048e>] local_pci_probe+0xeb/0x1a0 ... unreferenced object 0xffff888106500b00 (size 128): comm "modprobe", pid 1017, jiffies 4297787785 (age 67.152s) hex dump (first 32 bytes): 00 20 3d 01 80 88 ff ff 00 20 3d 01 80 88 ff ff . =...... =..... f0 23 3d 01 80 88 ff ff 00 20 3d 01 00 00 00 00 .#=...... =..... backtrace: [<00000000970ce626>] __kmem_cache_alloc_node+0x20c/0x380 [<00000000fb5f78d9>] kmalloc_trace+0x2f/0xb0 [<00000000f451c5be>] alloc_scq.constprop.0+0x4a/0x400 [idt77252] [<00000000e6313849>] idt77252_init_one+0x28cf/0x3c90 [idt77252] The root cause is traced to the vc_maps which alloced in open_card_oam() are not freed in close_card_oam(). The vc_maps are used to record open connections, so when close a vc_map in close_card_oam(), the memory should be freed. Moreover, the ubr0 is not closed when close a idt77252 device, leading to the memory leak of vc_map and scq_info. Fix them by adding kfree in close_card_oam() and implementing new close_card_ubr0() to close ubr0. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Li Zetao Reviewed-by: Francois Romieu Link: https://lore.kernel.org/r/20230320143318.2644630-1-lizetao1@huawei.com Signed-off-by: Jakub Kicinski --- drivers/atm/idt77252.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index eec0cc2144e0..e327a0229dc1 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -2909,6 +2909,7 @@ close_card_oam(struct idt77252_dev *card) recycle_rx_pool_skb(card, &vc->rcv.rx_pool); } + kfree(vc); } } } @@ -2952,6 +2953,15 @@ open_card_ubr0(struct idt77252_dev *card) return 0; } +static void +close_card_ubr0(struct idt77252_dev *card) +{ + struct vc_map *vc = card->vcs[0]; + + free_scq(card, vc->scq); + kfree(vc); +} + static int idt77252_dev_open(struct idt77252_dev *card) { @@ -3001,6 +3011,7 @@ static void idt77252_dev_close(struct atm_dev *dev) struct idt77252_dev *card = dev->dev_data; u32 conf; + close_card_ubr0(card); close_card_oam(card); conf = SAR_CFG_RXPTH | /* enable receive path */ -- cgit From 8e50ed774554f93d55426039b27b1e38d7fa64d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Mar 2023 16:34:27 +0000 Subject: erspan: do not use skb_mac_header() in ndo_start_xmit() Drivers should not assume skb_mac_header(skb) == skb->data in their ndo_start_xmit(). Use skb_network_offset() and skb_transport_offset() which better describe what is needed in erspan_fb_xmit() and ip6erspan_tunnel_xmit() syzbot reported: WARNING: CPU: 0 PID: 5083 at include/linux/skbuff.h:2873 skb_mac_header include/linux/skbuff.h:2873 [inline] WARNING: CPU: 0 PID: 5083 at include/linux/skbuff.h:2873 ip6erspan_tunnel_xmit+0x1d9c/0x2d90 net/ipv6/ip6_gre.c:962 Modules linked in: CPU: 0 PID: 5083 Comm: syz-executor406 Not tainted 6.3.0-rc2-syzkaller-00866-gd4671cb96fa3 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023 RIP: 0010:skb_mac_header include/linux/skbuff.h:2873 [inline] RIP: 0010:ip6erspan_tunnel_xmit+0x1d9c/0x2d90 net/ipv6/ip6_gre.c:962 Code: 04 02 41 01 de 84 c0 74 08 3c 03 0f 8e 1c 0a 00 00 45 89 b4 24 c8 00 00 00 c6 85 77 fe ff ff 01 e9 33 e7 ff ff e8 b4 27 a1 f8 <0f> 0b e9 b6 e7 ff ff e8 a8 27 a1 f8 49 8d bf f0 0c 00 00 48 b8 00 RSP: 0018:ffffc90003b2f830 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 000000000000ffff RCX: 0000000000000000 RDX: ffff888021273a80 RSI: ffffffff88e1bd4c RDI: 0000000000000003 RBP: ffffc90003b2f9d8 R08: 0000000000000003 R09: 000000000000ffff R10: 000000000000ffff R11: 0000000000000000 R12: ffff88802b28da00 R13: 00000000000000d0 R14: ffff88807e25b6d0 R15: ffff888023408000 FS: 0000555556a61300(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055e5b11eb6e8 CR3: 0000000027c1b000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __netdev_start_xmit include/linux/netdevice.h:4900 [inline] netdev_start_xmit include/linux/netdevice.h:4914 [inline] __dev_direct_xmit+0x504/0x730 net/core/dev.c:4300 dev_direct_xmit include/linux/netdevice.h:3088 [inline] packet_xmit+0x20a/0x390 net/packet/af_packet.c:285 packet_snd net/packet/af_packet.c:3075 [inline] packet_sendmsg+0x31a0/0x5150 net/packet/af_packet.c:3107 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg+0xde/0x190 net/socket.c:747 __sys_sendto+0x23a/0x340 net/socket.c:2142 __do_sys_sendto net/socket.c:2154 [inline] __se_sys_sendto net/socket.c:2150 [inline] __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2150 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f123aaa1039 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc15d12058 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f123aaa1039 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000020000040 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f123aa648c0 R13: 431bde82d7b634db R14: 0000000000000000 R15: 0000000000000000 Fixes: 1baf5ebf8954 ("erspan: auto detect truncated packets.") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230320163427.8096-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_gre.c | 4 ++-- net/ipv6/ip6_gre.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ffff46cdcb58..e55a20264960 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -552,7 +552,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -561,7 +561,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 89f5f0f3f5d6..a4ecfc9d2593 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -959,7 +959,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, truncate = true; } - nhoff = skb_network_header(skb) - skb_mac_header(skb); + nhoff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP) && (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) truncate = true; @@ -968,7 +968,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, int thoff; if (skb_transport_header_was_set(skb)) - thoff = skb_transport_header(skb) - skb_mac_header(skb); + thoff = skb_transport_offset(skb); else thoff = nhoff + sizeof(struct ipv6hdr); if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff) -- cgit From 6acc72a43eac78a309160d0a7512bbc59bcdd757 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Mar 2023 03:03:23 +0200 Subject: net: mscc: ocelot: fix stats region batching The blamed commit changed struct ocelot_stat_layout :: "u32 offset" to "u32 reg". However, "u32 reg" is not quite a register address, but an enum ocelot_reg, which in itself encodes an enum ocelot_target target in the upper bits, and an index into the ocelot->map[target][] array in the lower bits. So, whereas the previous code comparison between stats_layout[i].offset and last + 1 was correct (because those "offsets" at the time were 32-bit relative addresses), the new code, comparing layout[i].reg to last + 4 is not correct, because the "reg" here is an enum/index, not an actual register address. What we want to compare are indeed register addresses, but to do that, we need to actually go through the same motions as __ocelot_bulk_read_ix() itself. With this bug, all statistics counters are deemed by ocelot_prepare_stats_regions() as constituting their own region. (Truncated) log on VSC9959 (Felix) below (prints added by me): Before: region of 1 contiguous counters starting with SYS:STAT:CNT[0x000] region of 1 contiguous counters starting with SYS:STAT:CNT[0x001] region of 1 contiguous counters starting with SYS:STAT:CNT[0x002] ... region of 1 contiguous counters starting with SYS:STAT:CNT[0x041] region of 1 contiguous counters starting with SYS:STAT:CNT[0x042] region of 1 contiguous counters starting with SYS:STAT:CNT[0x080] region of 1 contiguous counters starting with SYS:STAT:CNT[0x081] ... region of 1 contiguous counters starting with SYS:STAT:CNT[0x0ac] region of 1 contiguous counters starting with SYS:STAT:CNT[0x100] region of 1 contiguous counters starting with SYS:STAT:CNT[0x101] ... region of 1 contiguous counters starting with SYS:STAT:CNT[0x111] After: region of 67 contiguous counters starting with SYS:STAT:CNT[0x000] region of 45 contiguous counters starting with SYS:STAT:CNT[0x080] region of 18 contiguous counters starting with SYS:STAT:CNT[0x100] Since commit d87b1c08f38a ("net: mscc: ocelot: use bulk reads for stats") intended bulking as a performance improvement, and since now, with trivial-sized regions, performance is even worse than without bulking at all, this could easily qualify as a performance regression. Fixes: d4c367650704 ("net: mscc: ocelot: keep ocelot_stat_layout by reg address, not offset") Signed-off-by: Vladimir Oltean Acked-by: Colin Foster Tested-by: Colin Foster Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_stats.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index bdb893476832..096c81ec9dd6 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -899,7 +899,8 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) if (!layout[i].reg) continue; - if (region && layout[i].reg == last + 4) { + if (region && ocelot->map[SYS][layout[i].reg & REG_MASK] == + ocelot->map[SYS][last & REG_MASK] + 4) { region->count++; } else { region = devm_kzalloc(ocelot->dev, sizeof(*region), -- cgit From 17dfd210459837b453c2a3c20406ee12082f151c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Mar 2023 03:03:24 +0200 Subject: net: mscc: ocelot: fix transfer from region->buf to ocelot->stats To understand the problem, we need some definitions. The driver is aware of multiple counters (enum ocelot_stat), yet not all switches supported by the driver implement all counters. There are 2 statistics layouts: ocelot_stats_layout and ocelot_mm_stats_layout, the latter having 36 counters more than the former. ocelot->stats[] is not a compact array, i.e. there are elements within it which are not going to be populated for ocelot_stats_layout. On the other hand, ocelot->stats[] is easily indexable, for example "tx_octets" for port 3 can be found at ocelot->stats[3 * OCELOT_NUM_STATS + OCELOT_STAT_TX_OCTETS], and that is why we keep it sparse. Regions, as created by ocelot_prepare_stats_regions(), are compact (every element from region->buf will correspond to a counter that is present in this switch's layout) but are not easily indexable. Let's define holes as the ranges of values of enum ocelot_stat for which ocelot_stats_layout doesn't have a "reg" defined. For example, there is a hole between OCELOT_STAT_RX_GREEN_PRIO_7 and OCELOT_STAT_TX_OCTETS which is of 23 elements that are only present on ocelot_mm_stats_layout, and as such, they are also present in enum ocelot_stat. Let's define the left extremity of the hole - the last enum ocelot_stat still defined - as A (in this case OCELOT_STAT_RX_GREEN_PRIO_7) and the right extremity - the first enum ocelot_stat that is defined after a series of undefined ones - as B (in this case OCELOT_STAT_TX_OCTETS). There is a bug in the procedure which transfers stats from region->buf[] to ocelot->stats[]. For each hole in the ocelot_stats_layout, the logic transfers the stats starting with enum ocelot_stat B to ocelot->stats[] index A + 1. So all stats after a hole are saved to a position which is off by B - A + 1 elements. This causes 2 kinds of issues: (a) counters which shouldn't increment increment (b) counters which should increment don't Holes in the ocelot_stat_layout automatically imply the end of a region and the beginning of a new one; however the reverse is not necessarily true. For example, for ocelot_mm_stat_layout, there could be multiple regions (which indicate discontinuities in register addresses) while there is no hole (which indicates discontinuities in enum ocelot_stat values). In the example above, the stats from the second region->buf[] are not transferred to ocelot->stats starting with index "port * OCELOT_NUM_STATS + OCELOT_STAT_TX_OCTETS" as they should, but rather, starting with element "port * OCELOT_NUM_STATS + OCELOT_STAT_RX_GREEN_PRIO_7 + 1". That stats[] array element is not reported to user space for switches that use ocelot_stat_layout, and that is how issue (b) occurs. However, if the length of the second region is larger than the hole, then some stats will start to be transferred to the ocelot->stats[] indices which *are* reported to user space, but those indices contain wrong values (corresponding to unexpected counters). This is how issue (a) occurs. The procedure, as it was introduced in commit d87b1c08f38a ("net: mscc: ocelot: use bulk reads for stats"), was not buggy, because there were no holes in the struct ocelot_stat_layout instances at that time. The problem is that when those holes were introduced, the function was not updated to take them into consideration. To update the procedure, we need to know, for each region, which enum ocelot_stat corresponds to its region->base. We have no way of deducing that based on the contents of struct ocelot_stats_region, so we need to add this information. Fixes: ab3f97a9610a ("net: mscc: ocelot: export ethtool MAC Merge stats for Felix VSC9959") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_stats.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index 096c81ec9dd6..f18371154475 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -258,6 +258,7 @@ struct ocelot_stat_layout { struct ocelot_stats_region { struct list_head node; u32 base; + enum ocelot_stat first_stat; int count; u32 *buf; }; @@ -341,11 +342,12 @@ static int ocelot_port_update_stats(struct ocelot *ocelot, int port) */ static void ocelot_port_transfer_stats(struct ocelot *ocelot, int port) { - unsigned int idx = port * OCELOT_NUM_STATS; struct ocelot_stats_region *region; int j; list_for_each_entry(region, &ocelot->stats_regions, node) { + unsigned int idx = port * OCELOT_NUM_STATS + region->first_stat; + for (j = 0; j < region->count; j++) { u64 *stat = &ocelot->stats[idx + j]; u64 val = region->buf[j]; @@ -355,8 +357,6 @@ static void ocelot_port_transfer_stats(struct ocelot *ocelot, int port) *stat = (*stat & ~(u64)U32_MAX) + val; } - - idx += region->count; } } @@ -915,6 +915,7 @@ static int ocelot_prepare_stats_regions(struct ocelot *ocelot) WARN_ON(last >= layout[i].reg); region->base = layout[i].reg; + region->first_stat = i; region->count = 1; list_add_tail(®ion->node, &ocelot->stats_regions); } -- cgit From 5291099e0f61a4a1f4d2e11ac2af8123ece17e0e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 21 Mar 2023 03:03:25 +0200 Subject: net: mscc: ocelot: add TX_MM_HOLD to ocelot_mm_stats_layout The lack of a definition for this counter is what initially prompted me to investigate a problem which really manifested itself as the previous change, "net: mscc: ocelot: fix transfer from region->buf to ocelot->stats". When TX_MM_HOLD is defined in enum ocelot_stat but not in struct ocelot_stat_layout ocelot_mm_stats_layout, this creates a hole, which due to the aforementioned bug, makes all counters following TX_MM_HOLD be recorded off by one compared to their correct position. So for example, a non-zero TX_PMAC_OCTETS would be reported as TX_MERGE_FRAGMENTS, TX_PMAC_UNICAST would be reported as TX_PMAC_OCTETS, TX_PMAC_64 would be reported as TX_PMAC_PAUSE, etc etc. This is because the size of the hole (1) is much smaller than the size of the region, so the phenomenon where the stats are off-by-one, rather than lost, prevails. However, the phenomenon where stats are lost can be seen too, for example with DROP_LOCAL, which is at the beginning of its own region (offset 0x000400 vs the previous 0x0002b0 constitutes a discontinuity). This is also reported as off by one and saved to TX_PMAC_1527_MAX, but that counter is not reported to the unstructured "ethtool -S", as opposed to DROP_LOCAL which is (as "drop_local"). Fixes: ab3f97a9610a ("net: mscc: ocelot: export ethtool MAC Merge stats for Felix VSC9959") Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_stats.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mscc/ocelot_stats.c b/drivers/net/ethernet/mscc/ocelot_stats.c index f18371154475..d0e6cd8dbe5c 100644 --- a/drivers/net/ethernet/mscc/ocelot_stats.c +++ b/drivers/net/ethernet/mscc/ocelot_stats.c @@ -274,6 +274,7 @@ static const struct ocelot_stat_layout ocelot_mm_stats_layout[OCELOT_NUM_STATS] OCELOT_STAT(RX_ASSEMBLY_OK), OCELOT_STAT(RX_MERGE_FRAGMENTS), OCELOT_STAT(TX_MERGE_FRAGMENTS), + OCELOT_STAT(TX_MM_HOLD), OCELOT_STAT(RX_PMAC_OCTETS), OCELOT_STAT(RX_PMAC_UNICAST), OCELOT_STAT(RX_PMAC_MULTICAST), -- cgit From 4107b8746d93ace135b8c4da4f19bbae81db785f Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 21 Mar 2023 14:45:43 +1100 Subject: net/sonic: use dma_mapping_error() for error check The DMA address returned by dma_map_single() should be checked with dma_mapping_error(). Fix it accordingly. Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Signed-off-by: Zhang Changzhong Tested-by: Stan Johnson Signed-off-by: Finn Thain Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/6645a4b5c1e364312103f48b7b36783b94e197a2.1679370343.git.fthain@linux-m68k.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/natsemi/sonic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index d17d1b4f2585..825356ee3492 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -292,7 +292,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) */ laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); - if (!laddr) { + if (dma_mapping_error(lp->device, laddr)) { pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name); dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -509,7 +509,7 @@ static bool sonic_alloc_rb(struct net_device *dev, struct sonic_local *lp, *new_addr = dma_map_single(lp->device, skb_put(*new_skb, SONIC_RBSIZE), SONIC_RBSIZE, DMA_FROM_DEVICE); - if (!*new_addr) { + if (dma_mapping_error(lp->device, *new_addr)) { dev_kfree_skb(*new_skb); *new_skb = NULL; return false; -- cgit From def84ab600b71ea3fcc422a876d5d0d0daa7d4f3 Mon Sep 17 00:00:00 2001 From: Martin George Date: Thu, 16 Mar 2023 17:20:09 +0530 Subject: nvme: send Identify with CNS 06h only to I/O controllers Identify CNS 06h (I/O Command Set Specific Identify Controller data structure) is supported only on i/o controllers. But nvme_init_non_mdts_limits() currently invokes this on all controllers. Correct this by ensuring this is sent to I/O controllers only. Signed-off-by: Martin George Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d4be525f8100..53ef028596c6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3063,7 +3063,8 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) else ctrl->max_zeroes_sectors = 0; - if (nvme_ctrl_limited_cns(ctrl)) + if (ctrl->subsys->subtype != NVME_NQN_NVME || + nvme_ctrl_limited_cns(ctrl)) return 0; id = kzalloc(sizeof(*id), GFP_KERNEL); -- cgit From aa01c67de5926fdb276793180564f172c55fb0d7 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Mon, 20 Mar 2023 09:57:36 -0600 Subject: nvme-tcp: fix nvme_tcp_term_pdu to match spec The FEI field of C2HTermReq/H2CTermReq is 4 bytes but not 4-byte-aligned in the NVMe/TCP specification (it is located at offset 10 in the PDU). Split it into two 16-bit integers in struct nvme_tcp_term_pdu so no padding is inserted. There should also be 10 reserved bytes after. There are currently no users of this type. Fixes: fc221d05447aa6db ("nvme-tcp: Add protocol header") Reported-by: Geert Uytterhoeven Signed-off-by: Caleb Sander Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme-tcp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 75470159a194..57ebe1267f7f 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -115,8 +115,9 @@ struct nvme_tcp_icresp_pdu { struct nvme_tcp_term_pdu { struct nvme_tcp_hdr hdr; __le16 fes; - __le32 fei; - __u8 rsvd[8]; + __le16 feil; + __le16 feiu; + __u8 rsvd[10]; }; /** -- cgit From 66a1c22b709178e7b823d44465d0c2e5ed7492fb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 21 Mar 2023 09:30:59 +0100 Subject: mm/slab: Fix undefined init_cache_node_node() for NUMA and !SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sh/migor_defconfig: mm/slab.c: In function ‘slab_memory_callback’: mm/slab.c:1127:23: error: implicit declaration of function ‘init_cache_node_node’; did you mean ‘drain_cache_node_node’? [-Werror=implicit-function-declaration] 1127 | ret = init_cache_node_node(nid); | ^~~~~~~~~~~~~~~~~~~~ | drain_cache_node_node The #ifdef condition protecting the definition of init_cache_node_node() no longer matches the conditions protecting the (multiple) users. Fix this by syncing the conditions. Fixes: 76af6a054da40553 ("mm/migrate: add CPU hotplug to demotion #ifdef") Reported-by: Randy Dunlap Link: https://lore.kernel.org/r/b5bdea22-ed2f-3187-6efe-0c72330270a4@infradead.org Signed-off-by: Geert Uytterhoeven Reviewed-by: John Paul Adrian Glaubitz Acked-by: Randy Dunlap Cc: Signed-off-by: Vlastimil Babka --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 74ece29e3a7e..0d35747b9472 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -839,7 +839,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp) return 0; } -#if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP) +#if defined(CONFIG_NUMA) || defined(CONFIG_SMP) /* * Allocates and initializes node for a node on each slab cache, used for * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node -- cgit From b58e3d4311b54b6dd0e37165277965da0c9eb21d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Mar 2023 10:53:24 +0100 Subject: wifi: iwlwifi: mvm: fix mvmtxq->stopped handling This could race if the queue is redirected while full, then the flushing internally would start it while it's not yet usable again. Fix it by using two state bits instead of just one. Reviewed-by: Benjamin Berg Tested-by: Jose Ignacio Tornos Martinez Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 5 ++++- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 4 +++- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 5 ++++- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 ++-- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 565522466eba..f81c609ecf58 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -732,7 +732,10 @@ void iwl_mvm_mac_itxq_xmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq) rcu_read_lock(); do { - while (likely(!mvmtxq->stopped && + while (likely(!test_bit(IWL_MVM_TXQ_STATE_STOP_FULL, + &mvmtxq->state) && + !test_bit(IWL_MVM_TXQ_STATE_STOP_REDIRECT, + &mvmtxq->state) && !test_bit(IWL_MVM_STATUS_IN_D3, &mvm->status))) { skb = ieee80211_tx_dequeue(hw, txq); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 90bc95d96a78..421d2649b0f0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -729,7 +729,9 @@ struct iwl_mvm_txq { struct list_head list; u16 txq_id; atomic_t tx_request; - bool stopped; +#define IWL_MVM_TXQ_STATE_STOP_FULL 0 +#define IWL_MVM_TXQ_STATE_STOP_REDIRECT 1 + unsigned long state; }; static inline struct iwl_mvm_txq * diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index f4e9446d9dc2..efad8f92d132 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1691,7 +1691,10 @@ static void iwl_mvm_queue_state_change(struct iwl_op_mode *op_mode, txq = sta->txq[tid]; mvmtxq = iwl_mvm_txq_from_mac80211(txq); - mvmtxq->stopped = !start; + if (start) + clear_bit(IWL_MVM_TXQ_STATE_STOP_FULL, &mvmtxq->state); + else + set_bit(IWL_MVM_TXQ_STATE_STOP_FULL, &mvmtxq->state); if (start && mvmsta->sta_state != IEEE80211_STA_NOTEXIST) iwl_mvm_mac_itxq_xmit(mvm->hw, txq); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 69634fb82a9b..21ad7b85c434 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -693,7 +693,7 @@ static int iwl_mvm_redirect_queue(struct iwl_mvm *mvm, int queue, int tid, queue, iwl_mvm_ac_to_tx_fifo[ac]); /* Stop the queue and wait for it to empty */ - txq->stopped = true; + set_bit(IWL_MVM_TXQ_STATE_STOP_REDIRECT, &txq->state); ret = iwl_trans_wait_tx_queues_empty(mvm->trans, BIT(queue)); if (ret) { @@ -736,7 +736,7 @@ static int iwl_mvm_redirect_queue(struct iwl_mvm *mvm, int queue, int tid, out: /* Continue using the queue */ - txq->stopped = false; + clear_bit(IWL_MVM_TXQ_STATE_STOP_REDIRECT, &txq->state); return ret; } -- cgit From 923bf981eb6ecc027227716e30701bdcc1845fbf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Mar 2023 10:53:25 +0100 Subject: wifi: iwlwifi: mvm: protect TXQ list manipulation Some recent upstream debugging uncovered the fact that in iwlwifi, the TXQ list manipulation is racy. Introduce a new state bit for when the TXQ is completely ready and can be used without locking, and if that's not set yet acquire the lock to check everything correctly. Reviewed-by: Benjamin Berg Tested-by: Jose Ignacio Tornos Martinez Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 45 +++++++---------------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 + drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 1 + drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 25 +++++++++++-- 4 files changed, 39 insertions(+), 34 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index f81c609ecf58..b55b1b17f4d1 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -760,42 +760,25 @@ static void iwl_mvm_mac_wake_tx_queue(struct ieee80211_hw *hw, struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq); - /* - * Please note that racing is handled very carefully here: - * mvmtxq->txq_id is updated during allocation, and mvmtxq->list is - * deleted afterwards. - * This means that if: - * mvmtxq->txq_id != INVALID_QUEUE && list_empty(&mvmtxq->list): - * queue is allocated and we can TX. - * mvmtxq->txq_id != INVALID_QUEUE && !list_empty(&mvmtxq->list): - * a race, should defer the frame. - * mvmtxq->txq_id == INVALID_QUEUE && list_empty(&mvmtxq->list): - * need to allocate the queue and defer the frame. - * mvmtxq->txq_id == INVALID_QUEUE && !list_empty(&mvmtxq->list): - * queue is already scheduled for allocation, no need to allocate, - * should defer the frame. - */ - - /* If the queue is allocated TX and return. */ - if (!txq->sta || mvmtxq->txq_id != IWL_MVM_INVALID_QUEUE) { - /* - * Check that list is empty to avoid a race where txq_id is - * already updated, but the queue allocation work wasn't - * finished - */ - if (unlikely(txq->sta && !list_empty(&mvmtxq->list))) - return; - + if (likely(test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) || + !txq->sta) { iwl_mvm_mac_itxq_xmit(hw, txq); return; } - /* The list is being deleted only after the queue is fully allocated. */ - if (!list_empty(&mvmtxq->list)) - return; + /* iwl_mvm_mac_itxq_xmit() will later be called by the worker + * to handle any packets we leave on the txq now + */ - list_add_tail(&mvmtxq->list, &mvm->add_stream_txqs); - schedule_work(&mvm->add_stream_wk); + spin_lock_bh(&mvm->add_stream_lock); + /* The list is being deleted only after the queue is fully allocated. */ + if (list_empty(&mvmtxq->list) && + /* recheck under lock */ + !test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) { + list_add_tail(&mvmtxq->list, &mvm->add_stream_txqs); + schedule_work(&mvm->add_stream_wk); + } + spin_unlock_bh(&mvm->add_stream_lock); } #define CHECK_BA_TRIGGER(_mvm, _trig, _tid_bm, _tid, _fmt...) \ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 421d2649b0f0..f307c345dfa0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -731,6 +731,7 @@ struct iwl_mvm_txq { atomic_t tx_request; #define IWL_MVM_TXQ_STATE_STOP_FULL 0 #define IWL_MVM_TXQ_STATE_STOP_REDIRECT 1 +#define IWL_MVM_TXQ_STATE_READY 2 unsigned long state; }; @@ -829,6 +830,7 @@ struct iwl_mvm { struct iwl_mvm_tvqm_txq_info tvqm_info[IWL_MAX_TVQM_QUEUES]; }; struct work_struct add_stream_wk; /* To add streams to queues */ + spinlock_t add_stream_lock; const char *nvm_file_name; struct iwl_nvm_data *nvm_data; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index efad8f92d132..9711841bb456 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1195,6 +1195,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, INIT_DELAYED_WORK(&mvm->scan_timeout_dwork, iwl_mvm_scan_timeout_wk); INIT_WORK(&mvm->add_stream_wk, iwl_mvm_add_new_dqa_stream_wk); INIT_LIST_HEAD(&mvm->add_stream_txqs); + spin_lock_init(&mvm->add_stream_lock); init_waitqueue_head(&mvm->rx_sync_waitq); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 21ad7b85c434..9caae77995ca 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -384,8 +384,11 @@ static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta, struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_tid(sta, tid); - mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + spin_lock_bh(&mvm->add_stream_lock); list_del_init(&mvmtxq->list); + clear_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + spin_unlock_bh(&mvm->add_stream_lock); } /* Regardless if this is a reserved TXQ for a STA - mark it as false */ @@ -479,8 +482,11 @@ static int iwl_mvm_remove_sta_queue_marking(struct iwl_mvm *mvm, int queue) disable_agg_tids |= BIT(tid); mvmsta->tid_data[tid].txq_id = IWL_MVM_INVALID_QUEUE; - mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + spin_lock_bh(&mvm->add_stream_lock); list_del_init(&mvmtxq->list); + clear_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + spin_unlock_bh(&mvm->add_stream_lock); } mvmsta->tfd_queue_msk &= ~BIT(queue); /* Don't use this queue anymore */ @@ -1444,12 +1450,22 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) * a queue in the function itself. */ if (iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid)) { + spin_lock_bh(&mvm->add_stream_lock); list_del_init(&mvmtxq->list); + spin_unlock_bh(&mvm->add_stream_lock); continue; } - list_del_init(&mvmtxq->list); + /* now we're ready, any remaining races/concurrency will be + * handled in iwl_mvm_mac_itxq_xmit() + */ + set_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + local_bh_disable(); + spin_lock(&mvm->add_stream_lock); + list_del_init(&mvmtxq->list); + spin_unlock(&mvm->add_stream_lock); + iwl_mvm_mac_itxq_xmit(mvm->hw, txq); local_bh_enable(); } @@ -1864,8 +1880,11 @@ static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm, struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(sta->txq[i]); + spin_lock_bh(&mvm->add_stream_lock); mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; list_del_init(&mvmtxq->list); + clear_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + spin_unlock_bh(&mvm->add_stream_lock); } } -- cgit From 4e348c6c6e23491ae6eb5e077848a42d0562339c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 14 Mar 2023 10:59:50 +0100 Subject: wifi: mac80211: fix qos on mesh interfaces When ieee80211_select_queue is called for mesh, the sta pointer is usually NULL, since the nexthop is looked up much later in the tx path. Explicitly check for unicast address in that case in order to make qos work again. Cc: stable@vger.kernel.org Fixes: 50e2ab392919 ("wifi: mac80211: fix queue selection for mesh/OCB interfaces") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230314095956.62085-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/wme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index a12c63638680..1601be576414 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -147,6 +147,7 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata, u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb) { + const struct ethhdr *eth = (void *)skb->data; struct mac80211_qos_map *qos_map; bool qos; @@ -154,8 +155,9 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, skb_get_hash(skb); /* all mesh/ocb stations are required to support WME */ - if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB)) + if ((sdata->vif.type == NL80211_IFTYPE_MESH_POINT && + !is_multicast_ether_addr(eth->h_dest)) || + (sdata->vif.type == NL80211_IFTYPE_OCB && sta)) qos = true; else if (sta) qos = sta->sta.wme; -- cgit From f355f70145744518ca1d9799b42f4a8da9aa0d36 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 14 Mar 2023 10:59:52 +0100 Subject: wifi: mac80211: fix mesh path discovery based on unicast packets If a packet has reached its intended destination, it was bumped to the code that accepts it, without first checking if a mesh_path needs to be created based on the discovered source. Fix this by moving the destination address check further down. Cc: stable@vger.kernel.org Fixes: 986e43b19ae9 ("wifi: mac80211: fix receiving A-MSDU frames on mesh interfaces") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20230314095956.62085-3-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f7fdfe710951..e8de500eb9f3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2765,17 +2765,6 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta mesh_rmc_check(sdata, eth->h_source, mesh_hdr)) return RX_DROP_MONITOR; - /* Frame has reached destination. Don't forward */ - if (ether_addr_equal(sdata->vif.addr, eth->h_dest)) - goto rx_accept; - - if (!ifmsh->mshcfg.dot11MeshForwarding) { - if (is_multicast_ether_addr(eth->h_dest)) - goto rx_accept; - - return RX_DROP_MONITOR; - } - /* forward packet */ if (sdata->crypto_tx_tailroom_needed_cnt) tailroom = IEEE80211_ENCRYPT_TAILROOM; @@ -2814,6 +2803,17 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta rcu_read_unlock(); } + /* Frame has reached destination. Don't forward */ + if (ether_addr_equal(sdata->vif.addr, eth->h_dest)) + goto rx_accept; + + if (!ifmsh->mshcfg.dot11MeshForwarding) { + if (is_multicast_ether_addr(eth->h_dest)) + goto rx_accept; + + return RX_DROP_MONITOR; + } + skb_set_queue_mapping(skb, ieee802_1d_to_ac[skb->priority]); ieee80211_fill_mesh_addresses(&hdr, &hdr.frame_control, -- cgit From caa0708a81d6a2217c942959ef40d515ec1d3108 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 28 Feb 2023 10:01:42 +0900 Subject: bootconfig: Change message if no bootconfig with CONFIG_BOOT_CONFIG_FORCE=y Change no bootconfig data error message if user do not specify 'bootconfig' option but CONFIG_BOOT_CONFIG_FORCE=y. With CONFIG_BOOT_CONFIG_FORCE=y, the kernel proceeds bootconfig check even if user does not specify 'bootconfig' option. So the current error message is confusing. Let's show just an information message to notice skipping the bootconfig in that case. Link: https://lore.kernel.org/all/167754610254.318944.16848412476667893329.stgit@devnote2/ Fixes: b743852ccc1d ("Allow forcing unconditional bootconfig processing") Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/all/CAMuHMdV9jJvE2y8gY5V_CxidUikCf5515QMZHzTA3rRGEOj6=w@mail.gmail.com/ Suggested-by: Paul E. McKenney Signed-off-by: Masami Hiramatsu (Google) Tested-by: Paul E. McKenney Acked-by: Mukesh Ojha --- init/main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/init/main.c b/init/main.c index 4425d1783d5c..bb87b789c543 100644 --- a/init/main.c +++ b/init/main.c @@ -156,7 +156,7 @@ static char *extra_init_args; #ifdef CONFIG_BOOT_CONFIG /* Is bootconfig on command line? */ -static bool bootconfig_found = IS_ENABLED(CONFIG_BOOT_CONFIG_FORCE); +static bool bootconfig_found; static size_t initargs_offs; #else # define bootconfig_found false @@ -429,7 +429,7 @@ static void __init setup_boot_config(void) err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL, bootconfig_params); - if (IS_ERR(err) || !bootconfig_found) + if (IS_ERR(err) || !(bootconfig_found || IS_ENABLED(CONFIG_BOOT_CONFIG_FORCE))) return; /* parse_args() stops at the next param of '--' and returns an address */ @@ -437,7 +437,11 @@ static void __init setup_boot_config(void) initargs_offs = err - tmp_cmdline; if (!data) { - pr_err("'bootconfig' found on command line, but no bootconfig found\n"); + /* If user intended to use bootconfig, show an error level message */ + if (bootconfig_found) + pr_err("'bootconfig' found on command line, but no bootconfig found\n"); + else + pr_info("No bootconfig data provided, so skipping bootconfig"); return; } -- cgit From c0e0421a60bf468e88cf569fbd727346b138ed04 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 17:52:33 +0100 Subject: ACPI: processor: Reorder acpi_processor_driver_init() The cpufreq policy notifier in the ACPI processor driver may as well be registered before the driver itself, which causes acpi_processor_cpufreq_init to be true (unless the notifier registration fails, which is unlikely at that point) when the ACPI CPU thermal cooling devices are registered, so the processor_get_max_state() result does not change while acpi_processor_driver_init() is running. Change the ordering in acpi_processor_driver_init() accordingly to prevent the max_state value from remaining 0 permanently for all ACPI CPU cooling devices due to setting acpi_processor_cpufreq_init too late. [Note that processor_get_max_state() may still return different values at different times after this change, depending on the cpufreq driver registration time, but that issue needs to be addressed separately.] Fixes: a365105c685c("thermal: sysfs: Reuse cdev->max_state") Reported-by: Wang, Quanxian Link: https://lore.kernel.org/linux-pm/53ec1f06f61c984100868926f282647e57ecfb2d.camel@intel.com Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/acpi/processor_driver.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 1278969eec1f..4bd16b3f0781 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -263,6 +263,12 @@ static int __init acpi_processor_driver_init(void) if (acpi_disabled) return 0; + if (!cpufreq_register_notifier(&acpi_processor_notifier_block, + CPUFREQ_POLICY_NOTIFIER)) { + acpi_processor_cpufreq_init = true; + acpi_processor_ignore_ppc_init(); + } + result = driver_register(&acpi_processor_driver); if (result < 0) return result; @@ -276,12 +282,6 @@ static int __init acpi_processor_driver_init(void) cpuhp_setup_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD, "acpi/cpu-drv:dead", NULL, acpi_soft_cpu_dead); - if (!cpufreq_register_notifier(&acpi_processor_notifier_block, - CPUFREQ_POLICY_NOTIFIER)) { - acpi_processor_cpufreq_init = true; - acpi_processor_ignore_ppc_init(); - } - acpi_processor_throttling_init(); return 0; err: -- cgit From c43198af05cffa5de8d4f356c40ce4bdca066272 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 17:54:34 +0100 Subject: thermal: core: Introduce thermal_cooling_device_present() Introduce a helper function, thermal_cooling_device_present(), for checking if the given cooling device is in the list of registered cooling devices to avoid some code duplication in a subsequent patch. No expected functional impact. Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/thermal_core.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 55679fd86505..8894342540f1 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1045,6 +1045,18 @@ devm_thermal_of_cooling_device_register(struct device *dev, } EXPORT_SYMBOL_GPL(devm_thermal_of_cooling_device_register); +static bool thermal_cooling_device_present(struct thermal_cooling_device *cdev) +{ + struct thermal_cooling_device *pos = NULL; + + list_for_each_entry(pos, &thermal_cdev_list, node) { + if (pos == cdev) + return true; + } + + return false; +} + static void __unbind(struct thermal_zone_device *tz, int mask, struct thermal_cooling_device *cdev) { @@ -1067,20 +1079,17 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev) int i; const struct thermal_zone_params *tzp; struct thermal_zone_device *tz; - struct thermal_cooling_device *pos = NULL; if (!cdev) return; mutex_lock(&thermal_list_lock); - list_for_each_entry(pos, &thermal_cdev_list, node) - if (pos == cdev) - break; - if (pos != cdev) { - /* thermal cooling device not found */ + + if (!thermal_cooling_device_present(cdev)) { mutex_unlock(&thermal_list_lock); return; } + list_del(&cdev->node); /* Unbind all thermal zones associated with 'this' cdev */ -- cgit From 790930f44289c8209c57461b2db499fcc702e0b3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 18:01:26 +0100 Subject: thermal: core: Introduce thermal_cooling_device_update() Introduce a core thermal API function, thermal_cooling_device_update(), for updating the max_state value for a cooling device and rearranging its statistics in sysfs after a possible change of its ->get_max_state() callback return value. That callback is now invoked only once, during cooling device registration, to populate the max_state field in the cooling device object, so if its return value changes, it needs to be invoked again and the new return value needs to be stored as max_state. Moreover, the statistics presented in sysfs need to be rearranged in general, because there may not be enough room in them to store data for all of the possible states (in the case when max_state grows). The new function takes care of that (and some other minor things related to it), but some extra locking and lockdep annotations are added in several places too to protect against crashes in the cases when the statistics are not present or when a stale max_state value might be used by sysfs attributes. Note that the actual user of the new function will be added separately. Link: https://lore.kernel.org/linux-pm/53ec1f06f61c984100868926f282647e57ecfb2d.camel@intel.com/ Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/thermal_core.c | 83 ++++++++++++++++++++++++++++++++++++++++- drivers/thermal/thermal_core.h | 2 + drivers/thermal/thermal_sysfs.c | 74 +++++++++++++++++++++++++++++++----- include/linux/thermal.h | 1 + 4 files changed, 150 insertions(+), 10 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 8894342540f1..cfd4c1afeae7 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -613,6 +613,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, struct thermal_instance *pos; struct thermal_zone_device *pos1; struct thermal_cooling_device *pos2; + bool upper_no_limit; int result; if (trip >= tz->num_trips || trip < 0) @@ -632,7 +633,13 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, /* lower default 0, upper default max_state */ lower = lower == THERMAL_NO_LIMIT ? 0 : lower; - upper = upper == THERMAL_NO_LIMIT ? cdev->max_state : upper; + + if (upper == THERMAL_NO_LIMIT) { + upper = cdev->max_state; + upper_no_limit = true; + } else { + upper_no_limit = false; + } if (lower > upper || upper > cdev->max_state) return -EINVAL; @@ -644,6 +651,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, dev->cdev = cdev; dev->trip = trip; dev->upper = upper; + dev->upper_no_limit = upper_no_limit; dev->lower = lower; dev->target = THERMAL_NO_TARGET; dev->weight = weight; @@ -1057,6 +1065,79 @@ static bool thermal_cooling_device_present(struct thermal_cooling_device *cdev) return false; } +/** + * thermal_cooling_device_update - Update a cooling device object + * @cdev: Target cooling device. + * + * Update @cdev to reflect a change of the underlying hardware or platform. + * + * Must be called when the maximum cooling state of @cdev becomes invalid and so + * its .get_max_state() callback needs to be run to produce the new maximum + * cooling state value. + */ +void thermal_cooling_device_update(struct thermal_cooling_device *cdev) +{ + struct thermal_instance *ti; + unsigned long state; + + if (IS_ERR_OR_NULL(cdev)) + return; + + /* + * Hold thermal_list_lock throughout the update to prevent the device + * from going away while being updated. + */ + mutex_lock(&thermal_list_lock); + + if (!thermal_cooling_device_present(cdev)) + goto unlock_list; + + /* + * Update under the cdev lock to prevent the state from being set beyond + * the new limit concurrently. + */ + mutex_lock(&cdev->lock); + + if (cdev->ops->get_max_state(cdev, &cdev->max_state)) + goto unlock; + + thermal_cooling_device_stats_reinit(cdev); + + list_for_each_entry(ti, &cdev->thermal_instances, cdev_node) { + if (ti->upper == cdev->max_state) + continue; + + if (ti->upper < cdev->max_state) { + if (ti->upper_no_limit) + ti->upper = cdev->max_state; + + continue; + } + + ti->upper = cdev->max_state; + if (ti->lower > ti->upper) + ti->lower = ti->upper; + + if (ti->target == THERMAL_NO_TARGET) + continue; + + if (ti->target > ti->upper) + ti->target = ti->upper; + } + + if (cdev->ops->get_cur_state(cdev, &state) || state > cdev->max_state) + goto unlock; + + thermal_cooling_device_stats_update(cdev, state); + +unlock: + mutex_unlock(&cdev->lock); + +unlock_list: + mutex_unlock(&thermal_list_lock); +} +EXPORT_SYMBOL_GPL(thermal_cooling_device_update); + static void __unbind(struct thermal_zone_device *tz, int mask, struct thermal_cooling_device *cdev) { diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 7af54382e915..3d4a787c6b28 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -101,6 +101,7 @@ struct thermal_instance { struct list_head tz_node; /* node in tz->thermal_instances */ struct list_head cdev_node; /* node in cdev->thermal_instances */ unsigned int weight; /* The weight of the cooling device */ + bool upper_no_limit; }; #define to_thermal_zone(_dev) \ @@ -127,6 +128,7 @@ int thermal_zone_create_device_groups(struct thermal_zone_device *, int); void thermal_zone_destroy_device_groups(struct thermal_zone_device *); void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *); void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev); +void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev); /* used only at binding time */ ssize_t trip_point_show(struct device *, struct device_attribute *, char *); ssize_t weight_show(struct device *, struct device_attribute *, char *); diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index cef860deaf91..a4aba7b8bb8b 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -685,6 +685,8 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, { struct cooling_dev_stats *stats = cdev->stats; + lockdep_assert_held(&cdev->lock); + if (!stats) return; @@ -706,13 +708,22 @@ static ssize_t total_trans_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; - int ret; + struct cooling_dev_stats *stats; + int ret = 0; + + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) + goto unlock; spin_lock(&stats->lock); ret = sprintf(buf, "%u\n", stats->total_trans); spin_unlock(&stats->lock); +unlock: + mutex_unlock(&cdev->lock); + return ret; } @@ -721,11 +732,18 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; + struct cooling_dev_stats *stats; ssize_t len = 0; int i; + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) + goto unlock; + spin_lock(&stats->lock); + update_time_in_state(stats); for (i = 0; i <= cdev->max_state; i++) { @@ -734,6 +752,9 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr, } spin_unlock(&stats->lock); +unlock: + mutex_unlock(&cdev->lock); + return len; } @@ -742,8 +763,16 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; - int i, states = cdev->max_state + 1; + struct cooling_dev_stats *stats; + int i, states; + + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) + goto unlock; + + states = cdev->max_state + 1; spin_lock(&stats->lock); @@ -757,6 +786,9 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, spin_unlock(&stats->lock); +unlock: + mutex_unlock(&cdev->lock); + return count; } @@ -764,10 +796,18 @@ static ssize_t trans_table_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; + struct cooling_dev_stats *stats; ssize_t len = 0; int i, j; + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) { + len = -ENODATA; + goto unlock; + } + len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); for (i = 0; i <= cdev->max_state; i++) { @@ -775,8 +815,10 @@ static ssize_t trans_table_show(struct device *dev, break; len += snprintf(buf + len, PAGE_SIZE - len, "state%2u ", i); } - if (len >= PAGE_SIZE) - return PAGE_SIZE; + if (len >= PAGE_SIZE) { + len = PAGE_SIZE; + goto unlock; + } len += snprintf(buf + len, PAGE_SIZE - len, "\n"); @@ -799,8 +841,12 @@ static ssize_t trans_table_show(struct device *dev, if (len >= PAGE_SIZE) { pr_warn_once("Thermal transition table exceeds PAGE_SIZE. Disabling\n"); - return -EFBIG; + len = -EFBIG; } + +unlock: + mutex_unlock(&cdev->lock); + return len; } @@ -830,6 +876,8 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) unsigned long states = cdev->max_state + 1; int var; + lockdep_assert_held(&cdev->lock); + var = sizeof(*stats); var += sizeof(*stats->time_in_state) * states; var += sizeof(*stats->trans_table) * states * states; @@ -855,6 +903,8 @@ out: static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev) { + lockdep_assert_held(&cdev->lock); + kfree(cdev->stats); cdev->stats = NULL; } @@ -879,6 +929,12 @@ void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev) cooling_device_stats_destroy(cdev); } +void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev) +{ + cooling_device_stats_destroy(cdev); + cooling_device_stats_setup(cdev); +} + /* these helper will be used only at the time of bindig */ ssize_t trip_point_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 2bb4bf33f4f3..13c6aaed18df 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -384,6 +384,7 @@ devm_thermal_of_cooling_device_register(struct device *dev, struct device_node *np, char *type, void *devdata, const struct thermal_cooling_device_ops *ops); +void thermal_cooling_device_update(struct thermal_cooling_device *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); -- cgit From 22c52fa5155a2f48aedb0f675903b20457285a27 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 18:03:40 +0100 Subject: ACPI: processor: thermal: Update CPU cooling devices on cpufreq policy changes When a cpufreq policy appears or goes away, the CPU cooling devices for the CPUs covered by that policy need to be updated so that the new processor_get_max_state() value is stored as max_state and the statistics in sysfs are rearranged for each of them. Do that accordingly in acpi_thermal_cpufreq_init() and acpi_thermal_cpufreq_exit(). Fixes: a365105c685c("thermal: sysfs: Reuse cdev->max_state") Reported-by: Wang, Quanxian Link: https://lore.kernel.org/linux-pm/53ec1f06f61c984100868926f282647e57ecfb2d.camel@intel.com Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/acpi/processor_thermal.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index e534fd49a67e..b7c6287eccca 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -140,9 +140,13 @@ void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy) ret = freq_qos_add_request(&policy->constraints, &pr->thermal_req, FREQ_QOS_MAX, INT_MAX); - if (ret < 0) + if (ret < 0) { pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); + continue; + } + + thermal_cooling_device_update(pr->cdev); } } @@ -153,8 +157,12 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy) for_each_cpu(cpu, policy->related_cpus) { struct acpi_processor *pr = per_cpu(processors, cpu); - if (pr) - freq_qos_remove_request(&pr->thermal_req); + if (!pr) + continue; + + freq_qos_remove_request(&pr->thermal_req); + + thermal_cooling_device_update(pr->cdev); } } #else /* ! CONFIG_CPU_FREQ */ -- cgit From b8838e653034425cd26983c7d96535e2742a6212 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Mar 2023 12:33:13 +0100 Subject: arm64: dts: qcom: sc8280xp-x13s: mark s11b regulator as always-on The s11b supply is used by the wlan module (as well as some of the pmics) which are not yet fully described in the devicetree. Mark the regulator as always-on for now. Fixes: 123b30a75623 ("arm64: dts: qcom: sc8280xp-x13s: enable WiFi controller") Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230322113318.17908-2-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index fa412bea8985..532859363a22 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -377,6 +377,7 @@ regulator-min-microvolt = <1272000>; regulator-max-microvolt = <1272000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_s12b: smps12 { -- cgit From f4472fd33e4751c54efb13d2536261e9273770a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Mar 2023 12:33:14 +0100 Subject: arm64: dts: qcom: sc8280xp-x13s: mark s10b regulator as always-on The s10b supply is used by several components that are not (yet) described in devicetree (e.g. ram, charger, ec) and must not be disabled. Mark the regulator as always-on. Fixes: f29077d86652 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230322113318.17908-3-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index 532859363a22..d6f6feee635e 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -370,6 +370,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_s11b: smps11 { -- cgit From 291e6b6cd7145108d45aeffbac4bb1348fece6b6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Mar 2023 12:33:15 +0100 Subject: arm64: dts: qcom: sc8280xp-x13s: mark s12b regulator as always-on The s12b supply is used by several pmic regulators as well as the wlan/bluetooth radio which are not yet fully described in the devicetree. Mark the regulator as always-on for now. Fixes: f29077d86652 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230322113318.17908-4-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index d6f6feee635e..f327bc72463f 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -386,6 +386,7 @@ regulator-min-microvolt = <984000>; regulator-max-microvolt = <984000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l3b: ldo3 { -- cgit From 07b0883e1f09416d07d25a2158f8cd35b732b686 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Mar 2023 12:33:16 +0100 Subject: arm64: dts: qcom: sc8280xp-x13s: mark bob regulator as always-on The bob supply is used by several pmic regulators and components which are not (yet fully) described in the devicetree. Mark the regulator as always-on for now. Fixes: f29077d86652 ("arm64: dts: qcom: sc8280xp-x13s: Add soundcard support") Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230322113318.17908-5-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index f327bc72463f..99c6d6574559 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -444,6 +444,7 @@ regulator-min-microvolt = <3008000>; regulator-max-microvolt = <3960000>; regulator-initial-mode = ; + regulator-always-on; }; }; -- cgit From bb765a743377d46d8da8e7f7e5128022504741b9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 21 Mar 2023 12:42:00 +0100 Subject: mlxsw: spectrum_fid: Fix incorrect local port type Local port is a 10-bit number, but it was mistakenly stored in a u8, resulting in firmware errors when using a netdev corresponding to a local port higher than 255. Fix by storing the local port in u16, as is done in the rest of the code. Fixes: bf73904f5fba ("mlxsw: Add support for 802.1Q FID family") Signed-off-by: Ido Schimmel Reviewed-by: Danielle Ratson Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/eace1f9d96545ab8a2775db857cb7e291a9b166b.1679398549.git.petrm@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 045a24cacfa5..b6ee2d658b0c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -1354,7 +1354,7 @@ static int mlxsw_sp_fid_8021q_port_vid_map(struct mlxsw_sp_fid *fid, u16 vid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 local_port = mlxsw_sp_port->local_port; + u16 local_port = mlxsw_sp_port->local_port; int err; /* In case there are no {Port, VID} => FID mappings on the port, @@ -1391,7 +1391,7 @@ mlxsw_sp_fid_8021q_port_vid_unmap(struct mlxsw_sp_fid *fid, struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u8 local_port = mlxsw_sp_port->local_port; + u16 local_port = mlxsw_sp_port->local_port; mlxsw_sp_fid_port_vid_list_del(fid, mlxsw_sp_port->local_port, vid); mlxsw_sp_fid_evid_map(fid, local_port, vid, false); -- cgit From 6214894f49a967c749ee6c07cb00f9cede748df4 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 30 Nov 2022 16:09:11 +0100 Subject: hvc/xen: prevent concurrent accesses to the shared ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hvc machinery registers both a console and a tty device based on the hv ops provided by the specific implementation. Those two interfaces however have different locks, and there's no single locks that's shared between the tty and the console implementations, hence the driver needs to protect itself against concurrent accesses. Otherwise concurrent calls using the split interfaces are likely to corrupt the ring indexes, leaving the console unusable. Introduce a lock to xencons_info to serialize accesses to the shared ring. This is only required when using the shared memory console, concurrent accesses to the hypercall based console implementation are not an issue. Note the conditional logic in domU_read_console() is slightly modified so the notify_daemon() call can be done outside of the locked region: it's an hypercall and there's no need for it to be done with the lock held. Fixes: b536b4b96230 ('xen: use the hvc console infrastructure for Xen console') Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20221130150919.13935-1-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/tty/hvc/hvc_xen.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 5bddb2f5e931..98764e740c07 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -43,6 +43,7 @@ struct xencons_info { int irq; int vtermno; grant_ref_t gntref; + spinlock_t ring_lock; }; static LIST_HEAD(xenconsoles); @@ -89,12 +90,15 @@ static int __write_console(struct xencons_info *xencons, XENCONS_RING_IDX cons, prod; struct xencons_interface *intf = xencons->intf; int sent = 0; + unsigned long flags; + spin_lock_irqsave(&xencons->ring_lock, flags); cons = intf->out_cons; prod = intf->out_prod; mb(); /* update queue values before going on */ if ((prod - cons) > sizeof(intf->out)) { + spin_unlock_irqrestore(&xencons->ring_lock, flags); pr_err_once("xencons: Illegal ring page indices"); return -EINVAL; } @@ -104,6 +108,7 @@ static int __write_console(struct xencons_info *xencons, wmb(); /* write ring before updating pointer */ intf->out_prod = prod; + spin_unlock_irqrestore(&xencons->ring_lock, flags); if (sent) notify_daemon(xencons); @@ -146,16 +151,19 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); unsigned int eoiflag = 0; + unsigned long flags; if (xencons == NULL) return -EINVAL; intf = xencons->intf; + spin_lock_irqsave(&xencons->ring_lock, flags); cons = intf->in_cons; prod = intf->in_prod; mb(); /* get pointers before reading ring */ if ((prod - cons) > sizeof(intf->in)) { + spin_unlock_irqrestore(&xencons->ring_lock, flags); pr_err_once("xencons: Illegal ring page indices"); return -EINVAL; } @@ -179,10 +187,13 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) xencons->out_cons = intf->out_cons; xencons->out_cons_same = 0; } + if (!recv && xencons->out_cons_same++ > 1) { + eoiflag = XEN_EOI_FLAG_SPURIOUS; + } + spin_unlock_irqrestore(&xencons->ring_lock, flags); + if (recv) { notify_daemon(xencons); - } else if (xencons->out_cons_same++ > 1) { - eoiflag = XEN_EOI_FLAG_SPURIOUS; } xen_irq_lateeoi(xencons->irq, eoiflag); @@ -239,6 +250,7 @@ static int xen_hvm_console_init(void) info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); if (!info) return -ENOMEM; + spin_lock_init(&info->ring_lock); } else if (info->intf != NULL) { /* already configured */ return 0; @@ -275,6 +287,7 @@ err: static int xencons_info_pv_init(struct xencons_info *info, int vtermno) { + spin_lock_init(&info->ring_lock); info->evtchn = xen_start_info->console.domU.evtchn; /* GFN == MFN for PV guest */ info->intf = gfn_to_virt(xen_start_info->console.domU.mfn); @@ -325,6 +338,7 @@ static int xen_initial_domain_console_init(void) info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); if (!info) return -ENOMEM; + spin_lock_init(&info->ring_lock); } info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); @@ -482,6 +496,7 @@ static int xencons_probe(struct xenbus_device *dev, info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL); if (!info) return -ENOMEM; + spin_lock_init(&info->ring_lock); dev_set_drvdata(&dev->dev, info); info->xbdev = dev; info->vtermno = xenbus_devid_to_vtermno(devid); -- cgit From aadbd07ff8a75ed342388846da78dfaddb8b106a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 21 Mar 2023 09:03:26 +0100 Subject: x86/PVH: avoid 32-bit build warning when obtaining VGA console info In the commit referenced below I failed to pay attention to this code also being buildable as 32-bit. Adjust the type of "ret" - there's no real need for it to be wider than 32 bits. Fixes: 934ef33ee75c ("x86/PVH: obtain VGA console info in Dom0") Reported-by: kernel test robot Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/2d2193ff-670b-0a27-e12d-2c5c4c121c79@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pvh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 1da44aca896c..ada3868c02c2 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -48,7 +48,7 @@ void __init xen_pvh_init(struct boot_params *boot_params) struct xen_platform_op op = { .cmd = XENPF_get_dom0_console, }; - long ret = HYPERVISOR_platform_op(&op); + int ret = HYPERVISOR_platform_op(&op); if (ret > 0) xen_init_vga(&op.u.dom0_console, -- cgit From 915efd8a446b74442039d31689d5d863caf82517 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 21 Mar 2023 14:52:31 +0100 Subject: xdp: bpf_xdp_metadata use EOPNOTSUPP for no driver support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When driver doesn't implement a bpf_xdp_metadata kfunc the fallback implementation returns EOPNOTSUPP, which indicate device driver doesn't implement this kfunc. Currently many drivers also return EOPNOTSUPP when the hint isn't available, which is ambiguous from an API point of view. Instead change drivers to return ENODATA in these cases. There can be natural cases why a driver doesn't provide any hardware info for a specific hint, even on a frame to frame basis (e.g. PTP). Lets keep these cases as separate return codes. When describing the return values, adjust the function kernel-doc layout to get proper rendering for the return values. Fixes: ab46182d0dcb ("net/mlx4_en: Support RX XDP metadata") Fixes: bc8d405b1ba9 ("net/mlx5e: Support RX XDP metadata") Fixes: 306531f0249f ("veth: Support RX XDP metadata") Fixes: 3d76a4d3d4e5 ("bpf: XDP metadata RX kfuncs") Signed-off-by: Jesper Dangaard Brouer Acked-by: Stanislav Fomichev Acked-by: Toke Høiland-Jørgensen Acked-by: Tariq Toukan Link: https://lore.kernel.org/r/167940675120.2718408.8176058626864184420.stgit@firesoul Signed-off-by: Alexei Starovoitov --- Documentation/networking/xdp-rx-metadata.rst | 7 +++++-- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 ++-- drivers/net/veth.c | 4 ++-- net/core/xdp.c | 10 ++++++++-- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index aac63fc2d08b..25ce72af81c2 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -23,10 +23,13 @@ metadata is supported, this set will grow: An XDP program can use these kfuncs to read the metadata into stack variables for its own consumption. Or, to pass the metadata on to other consumers, an XDP program can store it into the metadata area carried -ahead of the packet. +ahead of the packet. Not all packets will necessary have the requested +metadata available in which case the driver returns ``-ENODATA``. Not all kfuncs have to be implemented by the device driver; when not -implemented, the default ones that return ``-EOPNOTSUPP`` will be used. +implemented, the default ones that return ``-EOPNOTSUPP`` will be used +to indicate the device driver have not implemented this kfunc. + Within an XDP frame, the metadata layout (accessed via ``xdp_buff``) is as follows:: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 0869d4fff17b..4b5e459b6d49 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -674,7 +674,7 @@ int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) struct mlx4_en_xdp_buff *_ctx = (void *)ctx; if (unlikely(_ctx->ring->hwtstamp_rx_filter != HWTSTAMP_FILTER_ALL)) - return -EOPNOTSUPP; + return -ENODATA; *timestamp = mlx4_en_get_hwtstamp(_ctx->mdev, mlx4_en_get_cqe_ts(_ctx->cqe)); @@ -686,7 +686,7 @@ int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) struct mlx4_en_xdp_buff *_ctx = (void *)ctx; if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH))) - return -EOPNOTSUPP; + return -ENODATA; *hash = be32_to_cpu(_ctx->cqe->immed_rss_invalid); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index bcd6370de440..c5dae48b7932 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -162,7 +162,7 @@ static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) const struct mlx5e_xdp_buff *_ctx = (void *)ctx; if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp))) - return -EOPNOTSUPP; + return -ENODATA; *timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time, _ctx->rq->clock, get_cqe_ts(_ctx->cqe)); @@ -174,7 +174,7 @@ static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) const struct mlx5e_xdp_buff *_ctx = (void *)ctx; if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))) - return -EOPNOTSUPP; + return -ENODATA; *hash = be32_to_cpu(_ctx->cqe->rss_hash_result); return 0; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1bb54de7124d..046461ee42ea 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1610,7 +1610,7 @@ static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) struct veth_xdp_buff *_ctx = (void *)ctx; if (!_ctx->skb) - return -EOPNOTSUPP; + return -ENODATA; *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp; return 0; @@ -1621,7 +1621,7 @@ static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) struct veth_xdp_buff *_ctx = (void *)ctx; if (!_ctx->skb) - return -EOPNOTSUPP; + return -ENODATA; *hash = skb_get_hash(_ctx->skb); return 0; diff --git a/net/core/xdp.c b/net/core/xdp.c index 8c92fc553317..247797168579 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -720,7 +720,10 @@ __diag_ignore_all("-Wmissing-prototypes", * @ctx: XDP context pointer. * @timestamp: Return value pointer. * - * Returns 0 on success or ``-errno`` on error. + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : means device driver does not implement kfunc + * * ``-ENODATA`` : means no RX-timestamp available for this frame */ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) { @@ -732,7 +735,10 @@ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *tim * @ctx: XDP context pointer. * @hash: Return value pointer. * - * Returns 0 on success or ``-errno`` on error. + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc + * * ``-ENODATA`` : means no RX-hash available for this frame */ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash) { -- cgit From ba98413bf45edbf33672e2539e321b851b2cfbd1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 11:35:33 +0100 Subject: drm/meson: fix missing component unbind on bind errors Make sure to unbind all subcomponents when binding the aggregate device fails. Fixes: a41e82e6c457 ("drm/meson: Add support for components") Cc: stable@vger.kernel.org # 4.12 Cc: Neil Armstrong Signed-off-by: Johan Hovold Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230306103533.4915-1-johan+linaro@kernel.org --- drivers/gpu/drm/meson/meson_drv.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 79bfe3938d3c..7caf937c3c90 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -325,23 +325,23 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) ret = meson_encoder_hdmi_init(priv); if (ret) - goto exit_afbcd; + goto unbind_all; ret = meson_plane_create(priv); if (ret) - goto exit_afbcd; + goto unbind_all; ret = meson_overlay_create(priv); if (ret) - goto exit_afbcd; + goto unbind_all; ret = meson_crtc_create(priv); if (ret) - goto exit_afbcd; + goto unbind_all; ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); if (ret) - goto exit_afbcd; + goto unbind_all; drm_mode_config_reset(drm); @@ -359,6 +359,9 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) uninstall_irq: free_irq(priv->vsync_irq, drm); +unbind_all: + if (has_components) + component_unbind_all(drm->dev, drm); exit_afbcd: if (priv->afbcd.ops) priv->afbcd.ops->exit(priv); -- cgit From 1a70ca89d59c7c8af006d29b965a95ede0abb0da Mon Sep 17 00:00:00 2001 From: Matheus Castello Date: Wed, 22 Mar 2023 15:38:21 +0100 Subject: drm/bridge: lt8912b: return EPROBE_DEFER if bridge is not found Returns EPROBE_DEFER when of_drm_find_bridge() fails, this is consistent with what all the other DRM bridge drivers are doing and this is required since the bridge might not be there when the driver is probed and this should not be a fatal failure. Cc: Fixes: 30e2ae943c26 ("drm/bridge: Introduce LT8912B DSI to HDMI bridge") Signed-off-by: Matheus Castello Signed-off-by: Francesco Dolcini Reviewed-by: Laurent Pinchart Reviewed-by: Andrzej Hajda Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230322143821.109744-1-francesco@dolcini.it --- drivers/gpu/drm/bridge/lontium-lt8912b.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index 2019a8167d69..b40baced1331 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -676,8 +676,8 @@ static int lt8912_parse_dt(struct lt8912 *lt) lt->hdmi_port = of_drm_find_bridge(port_node); if (!lt->hdmi_port) { - dev_err(lt->dev, "%s: Failed to get hdmi port\n", __func__); - ret = -ENODEV; + ret = -EPROBE_DEFER; + dev_err_probe(lt->dev, ret, "%s: Failed to get hdmi port\n", __func__); goto err_free_host_node; } -- cgit From 02a4d923e4400a36d340ea12d8058f69ebf3a383 Mon Sep 17 00:00:00 2001 From: Savino Dicanosa Date: Tue, 21 Mar 2023 19:44:02 +0000 Subject: io_uring/rsrc: fix null-ptr-deref in io_file_bitmap_get() When fixed files are unregistered, file_alloc_end and alloc_hint are not cleared. This can later cause a NULL pointer dereference in io_file_bitmap_get() if auto index selection is enabled via IORING_FILE_INDEX_ALLOC: [ 6.519129] BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] [ 6.541468] RIP: 0010:_find_next_zero_bit+0x1a/0x70 [...] [ 6.560906] Call Trace: [ 6.561322] [ 6.561672] io_file_bitmap_get+0x38/0x60 [ 6.562281] io_fixed_fd_install+0x63/0xb0 [ 6.562851] ? __pfx_io_socket+0x10/0x10 [ 6.563396] io_socket+0x93/0xf0 [ 6.563855] ? __pfx_io_socket+0x10/0x10 [ 6.564411] io_issue_sqe+0x5b/0x3d0 [ 6.564914] io_submit_sqes+0x1de/0x650 [ 6.565452] __do_sys_io_uring_enter+0x4fc/0xb20 [ 6.566083] ? __do_sys_io_uring_register+0x11e/0xd80 [ 6.566779] do_syscall_64+0x3c/0x90 [ 6.567247] entry_SYSCALL_64_after_hwframe+0x72/0xdc [...] To fix the issue, set file alloc range and alloc_hint to zero after file tables are freed. Cc: stable@vger.kernel.org Fixes: 4278a0deb1f6 ("io_uring: defer alloc_hint update to io_file_bitmap_set()") Signed-off-by: Savino Dicanosa [axboe: add explicit bitmap == NULL check as well] Signed-off-by: Jens Axboe --- io_uring/filetable.c | 3 +++ io_uring/rsrc.c | 1 + 2 files changed, 4 insertions(+) diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 68dfc6936aa7..b80614e7d605 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -19,6 +19,9 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx) unsigned long nr = ctx->file_alloc_end; int ret; + if (!table->bitmap) + return -ENFILE; + do { ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint); if (ret != nr) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index e2bac9f89902..7a43aed8e395 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -794,6 +794,7 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) } #endif io_free_file_tables(&ctx->file_table); + io_file_table_set_alloc_range(ctx, 0, 0); io_rsrc_data_free(ctx->file_data); ctx->file_data = NULL; ctx->nr_user_files = 0; -- cgit From a3f547addcaa10df5a226526bc9e2d9a94542344 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Mon, 6 Mar 2023 20:31:44 +0100 Subject: x86/mm: Do not shuffle CPU entry areas without KASLR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 97e3d26b5e5f ("x86/mm: Randomize per-cpu entry area") fixed an omission of KASLR on CPU entry areas. It doesn't take into account KASLR switches though, which may result in unintended non-determinism when a user wants to avoid it (e.g. debugging, benchmarking). Generate only a single combination of CPU entry areas offsets -- the linear array that existed prior randomization when KASLR is turned off. Since we have 3f148f331814 ("x86/kasan: Map shadow for percpu pages on demand") and followups, we can use the more relaxed guard kasrl_enabled() (in contrast to kaslr_memory_enabled()). Fixes: 97e3d26b5e5f ("x86/mm: Randomize per-cpu entry area") Signed-off-by: Michal Koutný Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230306193144.24605-1-mkoutny%40suse.com --- arch/x86/mm/cpu_entry_area.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 7316a8224259..e91500a80963 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -10,6 +10,7 @@ #include #include #include +#include static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); @@ -29,6 +30,12 @@ static __init void init_cea_offsets(void) unsigned int max_cea; unsigned int i, j; + if (!kaslr_enabled()) { + for_each_possible_cpu(i) + per_cpu(_cea_offset, i) = i; + return; + } + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; /* O(sodding terrible) */ -- cgit From b15888840207c2bfe678dd1f68a32db54315e71f Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Mon, 27 Feb 2023 13:05:03 -0800 Subject: x86/fpu/xstate: Prevent false-positive warning in __copy_xstate_uabi_buf() __copy_xstate_to_uabi_buf() copies either from the tasks XSAVE buffer or from init_fpstate into the ptrace buffer. Dynamic features, like XTILEDATA, have an all zeroes init state and are not saved in init_fpstate, which means the corresponding bit is not set in the xfeatures bitmap of the init_fpstate header. But __copy_xstate_to_uabi_buf() retrieves addresses for both the tasks xstate and init_fpstate unconditionally via __raw_xsave_addr(). So if the tasks XSAVE buffer has a dynamic feature set, then the address retrieval for init_fpstate triggers the warning in __raw_xsave_addr() which checks the feature bit in the init_fpstate header. Remove the address retrieval from init_fpstate for extended features. They have an all zeroes init state so init_fpstate has zeros for them. Then zeroing the user buffer for the init state is the same as copying them from init_fpstate. Fixes: 2308ee57d93d ("x86/fpu/amx: Enable the AMX feature in 64-bit mode") Reported-by: Mingwei Zhang Link: https://lore.kernel.org/kvm/20230221163655.920289-2-mizhang@google.com/ Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Tested-by: Mingwei Zhang Link: https://lore.kernel.org/all/20230227210504.18520-2-chang.seok.bae%40intel.com Cc: stable@vger.kernel.org --- arch/x86/kernel/fpu/xstate.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 714166cc25f2..0bab497c9436 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1118,21 +1118,20 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, zerofrom = offsetof(struct xregs_state, extended_state_area); /* - * The ptrace buffer is in non-compacted XSAVE format. In - * non-compacted format disabled features still occupy state space, - * but there is no state to copy from in the compacted - * init_fpstate. The gap tracking will zero these states. - */ - mask = fpstate->user_xfeatures; - - /* - * Dynamic features are not present in init_fpstate. When they are - * in an all zeros init state, remove those from 'mask' to zero - * those features in the user buffer instead of retrieving them - * from init_fpstate. + * This 'mask' indicates which states to copy from fpstate. + * Those extended states that are not present in fpstate are + * either disabled or initialized: + * + * In non-compacted format, disabled features still occupy + * state space but there is no state to copy from in the + * compacted init_fpstate. The gap tracking will zero these + * states. + * + * The extended features have an all zeroes init state. Thus, + * remove them from 'mask' to zero those features in the user + * buffer instead of retrieving them from init_fpstate. */ - if (fpu_state_size_dynamic()) - mask &= (header.xfeatures | xinit->header.xcomp_bv); + mask = header.xfeatures; for_each_extended_xfeature(i, mask) { /* @@ -1151,9 +1150,8 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, pkru.pkru = pkru_val; membuf_write(&to, &pkru, sizeof(pkru)); } else { - copy_feature(header.xfeatures & BIT_ULL(i), &to, + membuf_write(&to, __raw_xsave_addr(xsave, i), - __raw_xsave_addr(xinit, i), xstate_sizes[i]); } /* -- cgit From 62faca1ca10cc84e99ae7f38aa28df2bc945369b Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Mon, 27 Feb 2023 13:05:04 -0800 Subject: selftests/x86/amx: Add a ptrace test Include a test case to validate the XTILEDATA injection to the target. Also, it ensures the kernel's ability to copy states between different XSAVE formats. Refactor the memcmp() code to be usable for the state validation. Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230227210504.18520-3-chang.seok.bae%40intel.com --- tools/testing/selftests/x86/amx.c | 108 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index 625e42901237..d884fd69dd51 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -14,8 +14,10 @@ #include #include #include +#include #include #include +#include #include "../kselftest.h" /* For __cpuid_count() */ @@ -583,6 +585,13 @@ static void test_dynamic_state(void) _exit(0); } +static inline int __compare_tiledata_state(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2) +{ + return memcmp(&xbuf1->bytes[xtiledata.xbuf_offset], + &xbuf2->bytes[xtiledata.xbuf_offset], + xtiledata.size); +} + /* * Save current register state and compare it to @xbuf1.' * @@ -599,9 +608,7 @@ static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1) fatal_error("failed to allocate XSAVE buffer\n"); xsave(xbuf2, XFEATURE_MASK_XTILEDATA); - ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset], - &xbuf2->bytes[xtiledata.xbuf_offset], - xtiledata.size); + ret = __compare_tiledata_state(xbuf1, xbuf2); free(xbuf2); @@ -826,6 +833,99 @@ static void test_context_switch(void) free(finfo); } +/* Ptrace test */ + +/* + * Make sure the ptracee has the expanded kernel buffer on the first + * use. Then, initialize the state before performing the state + * injection from the ptracer. + */ +static inline void ptracee_firstuse_tiledata(void) +{ + load_rand_tiledata(stashed_xsave); + init_xtiledata(); +} + +/* + * Ptracer injects the randomized tile data state. It also reads + * before and after that, which will execute the kernel's state copy + * functions. So, the tester is advised to double-check any emitted + * kernel messages. + */ +static void ptracer_inject_tiledata(pid_t target) +{ + struct xsave_buffer *xbuf; + struct iovec iov; + + xbuf = alloc_xbuf(); + if (!xbuf) + fatal_error("unable to allocate XSAVE buffer"); + + printf("\tRead the init'ed tiledata via ptrace().\n"); + + iov.iov_base = xbuf; + iov.iov_len = xbuf_size; + + memset(stashed_xsave, 0, xbuf_size); + + if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_GETREGSET"); + + if (!__compare_tiledata_state(stashed_xsave, xbuf)) + printf("[OK]\tThe init'ed tiledata was read from ptracee.\n"); + else + printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n"); + + printf("\tInject tiledata via ptrace().\n"); + + load_rand_tiledata(xbuf); + + memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset], + &xbuf->bytes[xtiledata.xbuf_offset], + xtiledata.size); + + if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_SETREGSET"); + + if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_GETREGSET"); + + if (!__compare_tiledata_state(stashed_xsave, xbuf)) + printf("[OK]\tTiledata was correctly written to ptracee.\n"); + else + printf("[FAIL]\tTiledata was not correctly written to ptracee.\n"); +} + +static void test_ptrace(void) +{ + pid_t child; + int status; + + child = fork(); + if (child < 0) { + err(1, "fork"); + } else if (!child) { + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL)) + err(1, "PTRACE_TRACEME"); + + ptracee_firstuse_tiledata(); + + raise(SIGTRAP); + _exit(0); + } + + do { + wait(&status); + } while (WSTOPSIG(status) != SIGTRAP); + + ptracer_inject_tiledata(child); + + ptrace(PTRACE_DETACH, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + err(1, "ptrace test"); +} + int main(void) { /* Check hardware availability at first */ @@ -846,6 +946,8 @@ int main(void) ctxtswtest_config.num_threads = 5; test_context_switch(); + test_ptrace(); + clearhandler(SIGILL); free_stashed_xsave(); -- cgit From f1b80a3878b2d76ced46a275fdfd7fb80b4f083b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 14 Mar 2023 17:50:10 +0200 Subject: thermal: core: Restore behavior regarding invalid trip points Commit 7c3d5c20dc16 ("thermal/core: Add a generic thermal_zone_get_trip() function") stopped marking trip points with a zero temperature as disabled, behavior that was originally introduced in commit 81ad4276b505 ("Thermal: Ignore invalid trip points"). When using the mlxsw driver we see that when such trip points are not disabled, the thermal subsystem repeatedly tries to set the state of the associated cooling devices to the maximum state. Address this by restoring the original behavior and mark trip points with a zero temperature as disabled. Fixes: 7c3d5c20dc16 ("thermal/core: Add a generic thermal_zone_get_trip() function") Signed-off-by: Ido Schimmel Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 55679fd86505..b50931f84aaa 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1309,7 +1309,7 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t struct thermal_trip trip; result = thermal_zone_get_trip(tz, count, &trip); - if (result) + if (result || !trip.temperature) set_bit(count, &tz->trips_disabled); } -- cgit From 5f24a8725fe7bc2c6adf7ce00dd3e818387d8995 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 22 Mar 2023 15:01:34 -0400 Subject: SUNRPC: Fix a crash in gss_krb5_checksum() Anna says: > KASAN reports [...] a slab-out-of-bounds in gss_krb5_checksum(), > and it can cause my client to panic when running cthon basic > tests with krb5p. > Running faddr2line gives me: > > gss_krb5_checksum+0x4b6/0x630: > ahash_request_free at > /home/anna/Programs/linux-nfs.git/./include/crypto/hash.h:619 > (inlined by) gss_krb5_checksum at > /home/anna/Programs/linux-nfs.git/net/sunrpc/auth_gss/gss_krb5_crypto.c:358 My diagnosis is that the memcpy() at the end of gss_krb5_checksum() reads past the end of the buffer containing the checksum data because the callers have ignored gss_krb5_checksum()'s API contract: * Caller provides the truncation length of the output token (h) in * cksumout.len. Instead they provide the fixed length of the hmac buffer. This length happens to be larger than the value returned by crypto_ahash_digestsize(). Change these errant callers to work like krb5_etm_{en,de}crypt(). As a defensive measure, bound the length of the byte copy at the end of gss_krb5_checksum(). Kunit sez: Testing complete. Ran 68 tests: passed: 68 Elapsed time: 81.680s total, 5.875s configuring, 75.610s building, 0.103s running Reported-by: Anna Schumaker Fixes: 8270dbfcebea ("SUNRPC: Obscure Kerberos integrity keys") Signed-off-by: Chuck Lever --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 6c7c52eeed4f..212c5d57465a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -353,7 +353,9 @@ gss_krb5_checksum(struct crypto_ahash *tfm, char *header, int hdrlen, err = crypto_ahash_final(req); if (err) goto out_free_ahash; - memcpy(cksumout->data, checksumdata, cksumout->len); + + memcpy(cksumout->data, checksumdata, + min_t(int, cksumout->len, crypto_ahash_digestsize(tfm))); out_free_ahash: ahash_request_free(req); @@ -809,8 +811,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN; buf->len += GSS_KRB5_TOK_HDR_LEN; - /* Do the HMAC */ - hmac.len = GSS_KRB5_MAX_CKSUM_LEN; + hmac.len = kctx->gk5e->cksumlength; hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len; /* @@ -873,8 +874,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, if (ret) goto out_err; - /* Calculate our hmac over the plaintext data */ - our_hmac_obj.len = sizeof(our_hmac); + our_hmac_obj.len = kctx->gk5e->cksumlength; our_hmac_obj.data = our_hmac; ret = gss_krb5_checksum(ahash, NULL, 0, &subbuf, 0, &our_hmac_obj); if (ret) -- cgit From 7a891d4b62d62566323676cb0e922ded4f37afe1 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 Mar 2023 00:01:21 +0900 Subject: ksmbd: fix wrong signingkey creation when encryption is AES256 MacOS and Win11 support AES256 encrytion and it is included in the cipher array of encryption context. Especially on macOS, The most preferred cipher is AES256. Connecting to ksmbd fails on newer MacOS clients that support AES256 encryption. MacOS send disconnect request after receiving final session setup response from ksmbd. Because final session setup is signed with signing key was generated incorrectly. For signging key, 'L' value should be initialized to 128 if key size is 16bytes. Cc: stable@vger.kernel.org Reported-by: Miao Lihua <441884205@qq.com> Tested-by: Miao Lihua <441884205@qq.com> Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/auth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 6e61b5bc7d86..cead696b656a 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -727,8 +727,9 @@ static int generate_key(struct ksmbd_conn *conn, struct ksmbd_session *sess, goto smb3signkey_ret; } - if (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM || - conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM) + if (key_size == SMB3_ENC_DEC_KEY_SIZE && + (conn->cipher_type == SMB2_ENCRYPTION_AES256_CCM || + conn->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L256, 4); else rc = crypto_shash_update(CRYPTO_HMACSHA256(ctx), L128, 4); -- cgit From 728f14c72b71a19623df329c1c7c9d1452e56f1e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 Mar 2023 00:02:30 +0900 Subject: ksmbd: set FILE_NAMED_STREAMS attribute in FS_ATTRIBUTE_INFORMATION If vfs objects = streams_xattr in ksmbd.conf FILE_NAMED_STREAMS should be set to Attributes in FS_ATTRIBUTE_INFORMATION. MacOS client show "Format: SMB (Unknown)" on faked NTFS and no streams support. Cc: stable@vger.kernel.org Reported-by: Miao Lihua <441884205@qq.com> Tested-by: Miao Lihua <441884205@qq.com> Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 0685c1c77b9f..bc64d36c4dcf 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -4934,6 +4934,10 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, info->Attributes |= cpu_to_le32(server_conf.share_fake_fscaps); + if (test_share_config_flag(work->tcon->share_conf, + KSMBD_SHARE_FLAG_STREAMS)) + info->Attributes |= cpu_to_le32(FILE_NAMED_STREAMS); + info->MaxPathNameComponentLength = cpu_to_le32(stfs.f_namelen); len = smbConvertToUTF16((__le16 *)info->FileSystemName, "NTFS", PATH_MAX, conn->local_nls, 0); -- cgit From 2d74ec97131b1179a373b6d521f195c84e894eb6 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 5 Mar 2023 21:04:00 +0900 Subject: ksmbd: add low bound validation to FSCTL_SET_ZERO_DATA Smatch static checker warning: fs/ksmbd/smb2pdu.c:7759 smb2_ioctl() warn: no lower bound on 'off' Fix unexpected result that could caused from negative off and bfz. Fixes: b5e5f9dfc915 ("ksmbd: check invalid FileOffset and BeyondFinalZero in FSCTL_ZERO_DATA") Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Reviewed-by: Sergey Senozhatsky Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index bc64d36c4dcf..f09afbdde58a 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7755,7 +7755,7 @@ int smb2_ioctl(struct ksmbd_work *work) off = le64_to_cpu(zero_data->FileOffset); bfz = le64_to_cpu(zero_data->BeyondFinalZero); - if (off > bfz) { + if (off < 0 || bfz < 0 || off > bfz) { ret = -EINVAL; goto out; } -- cgit From 342edb60dcda7a409430359b0cac2864bb9dfe44 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 7 Mar 2023 21:56:07 +0900 Subject: ksmbd: add low bound validation to FSCTL_QUERY_ALLOCATED_RANGES Smatch static checker warning: fs/ksmbd/vfs.c:1040 ksmbd_vfs_fqar_lseek() warn: no lower bound on 'length' fs/ksmbd/vfs.c:1041 ksmbd_vfs_fqar_lseek() warn: no lower bound on 'start' Fix unexpected result that could caused from negative start and length. Fixes: f44158485826 ("cifsd: add file operations") Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Reviewed-by: Sergey Senozhatsky Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index f09afbdde58a..cb779d217234 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7448,13 +7448,16 @@ static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id, if (in_count == 0) return -EINVAL; + start = le64_to_cpu(qar_req->file_offset); + length = le64_to_cpu(qar_req->length); + + if (start < 0 || length < 0) + return -EINVAL; + fp = ksmbd_lookup_fd_fast(work, id); if (!fp) return -ENOENT; - start = le64_to_cpu(qar_req->file_offset); - length = le64_to_cpu(qar_req->length); - ret = ksmbd_vfs_fqar_lseek(fp, start, length, qar_rsp, in_count, out_count); if (ret && ret != -E2BIG) -- cgit From 2624b445544ffc1472ccabfb6ec867c199d4c95c Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 2 Mar 2023 21:58:04 +0800 Subject: ksmbd: fix possible refcount leak in smb2_open() Reference count of acls will leak when memory allocation fails. Fix this by adding the missing posix_acl_release(). Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Signed-off-by: ChenXiaoSong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index cb779d217234..97c9d1b5bcc0 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2977,8 +2977,11 @@ int smb2_open(struct ksmbd_work *work) sizeof(struct smb_acl) + sizeof(struct smb_ace) * ace_num * 2, GFP_KERNEL); - if (!pntsd) + if (!pntsd) { + posix_acl_release(fattr.cf_acls); + posix_acl_release(fattr.cf_dacls); goto err_out; + } rc = build_sec_desc(idmap, pntsd, NULL, 0, -- cgit From be6f42fad5f5fd1fea9d562df82c38ad6ed3bfe9 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 21 Mar 2023 15:25:34 +0900 Subject: ksmbd: don't terminate inactive sessions after a few seconds Steve reported that inactive sessions are terminated after a few seconds. ksmbd terminate when receiving -EAGAIN error from kernel_recvmsg(). -EAGAIN means there is no data available in timeout. So ksmbd should keep connection with unlimited retries instead of terminating inactive sessions. Cc: stable@vger.kernel.org Reported-by: Steve French Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/connection.c | 4 ++-- fs/ksmbd/connection.h | 3 ++- fs/ksmbd/transport_rdma.c | 2 +- fs/ksmbd/transport_tcp.c | 35 +++++++++++++++++++++++------------ 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 5b10b03800c1..5d914715605f 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -298,7 +298,7 @@ int ksmbd_conn_handler_loop(void *p) kvfree(conn->request_buf); conn->request_buf = NULL; - size = t->ops->read(t, hdr_buf, sizeof(hdr_buf)); + size = t->ops->read(t, hdr_buf, sizeof(hdr_buf), -1); if (size != sizeof(hdr_buf)) break; @@ -344,7 +344,7 @@ int ksmbd_conn_handler_loop(void *p) * We already read 4 bytes to find out PDU size, now * read in PDU */ - size = t->ops->read(t, conn->request_buf + 4, pdu_size); + size = t->ops->read(t, conn->request_buf + 4, pdu_size, 2); if (size < 0) { pr_err("sock_read failed: %d\n", size); break; diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h index 3643354a3fa7..0e3a848defaf 100644 --- a/fs/ksmbd/connection.h +++ b/fs/ksmbd/connection.h @@ -114,7 +114,8 @@ struct ksmbd_transport_ops { int (*prepare)(struct ksmbd_transport *t); void (*disconnect)(struct ksmbd_transport *t); void (*shutdown)(struct ksmbd_transport *t); - int (*read)(struct ksmbd_transport *t, char *buf, unsigned int size); + int (*read)(struct ksmbd_transport *t, char *buf, + unsigned int size, int max_retries); int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov, int size, bool need_invalidate_rkey, unsigned int remote_key); diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index 096eda9ef873..c06efc020bd9 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -670,7 +670,7 @@ static int smb_direct_post_recv(struct smb_direct_transport *t, } static int smb_direct_read(struct ksmbd_transport *t, char *buf, - unsigned int size) + unsigned int size, int unused) { struct smb_direct_recvmsg *recvmsg; struct smb_direct_data_transfer *data_transfer; diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 603893fd87f5..20e85e2701f2 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -291,16 +291,18 @@ static int ksmbd_tcp_run_kthread(struct interface *iface) /** * ksmbd_tcp_readv() - read data from socket in given iovec - * @t: TCP transport instance - * @iov_orig: base IO vector - * @nr_segs: number of segments in base iov - * @to_read: number of bytes to read from socket + * @t: TCP transport instance + * @iov_orig: base IO vector + * @nr_segs: number of segments in base iov + * @to_read: number of bytes to read from socket + * @max_retries: maximum retry count * * Return: on success return number of bytes read from socket, * otherwise return error number */ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, - unsigned int nr_segs, unsigned int to_read) + unsigned int nr_segs, unsigned int to_read, + int max_retries) { int length = 0; int total_read; @@ -308,7 +310,6 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, struct msghdr ksmbd_msg; struct kvec *iov; struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn; - int max_retry = 2; iov = get_conn_iovec(t, nr_segs); if (!iov) @@ -335,14 +336,23 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) { total_read = -EAGAIN; break; - } else if ((length == -ERESTARTSYS || length == -EAGAIN) && - max_retry) { + } else if (length == -ERESTARTSYS || length == -EAGAIN) { + /* + * If max_retries is negative, Allow unlimited + * retries to keep connection with inactive sessions. + */ + if (max_retries == 0) { + total_read = length; + break; + } else if (max_retries > 0) { + max_retries--; + } + usleep_range(1000, 2000); length = 0; - max_retry--; continue; } else if (length <= 0) { - total_read = -EAGAIN; + total_read = length; break; } } @@ -358,14 +368,15 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, * Return: on success return number of bytes read from socket, * otherwise return error number */ -static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf, unsigned int to_read) +static int ksmbd_tcp_read(struct ksmbd_transport *t, char *buf, + unsigned int to_read, int max_retries) { struct kvec iov; iov.iov_base = buf; iov.iov_len = to_read; - return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read); + return ksmbd_tcp_readv(TCP_TRANS(t), &iov, 1, to_read, max_retries); } static int ksmbd_tcp_writev(struct ksmbd_transport *t, struct kvec *iov, -- cgit From b53e8cfec30b93c120623232ba27c041b1ef8f1a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 21 Mar 2023 15:36:40 +0900 Subject: ksmbd: return STATUS_NOT_SUPPORTED on unsupported smb2.0 dialect ksmbd returned "Input/output error" when mounting with vers=2.0 to ksmbd. It should return STATUS_NOT_SUPPORTED on unsupported smb2.0 dialect. Cc: stable@vger.kernel.org Reported-by: Steve French Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index fa2b54df6ee6..079c9e76818d 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -434,7 +434,7 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, const char *longname, static int __smb2_negotiate(struct ksmbd_conn *conn) { - return (conn->dialect >= SMB21_PROT_ID && + return (conn->dialect >= SMB20_PROT_ID && conn->dialect <= SMB311_PROT_ID); } @@ -465,7 +465,7 @@ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command) } } - if (command == SMB2_NEGOTIATE_HE && __smb2_negotiate(conn)) { + if (command == SMB2_NEGOTIATE_HE) { ret = smb2_handle_negotiate(work); init_smb2_neg_rsp(work); return ret; -- cgit From 3c44a431d62bf4a20d7b901f98266ae3f4676d48 Mon Sep 17 00:00:00 2001 From: Zhengping Jiang Date: Tue, 21 Feb 2023 16:17:56 -0800 Subject: Bluetooth: hci_sync: Resume adv with no RPA when active scan The address resolution should be disabled during the active scan, so all the advertisements can reach the host. The advertising has to be paused before disabling the address resolution, because the advertising will prevent any changes to the resolving list and the address resolution status. Skipping this will cause the hci error and the discovery failure. According to the bluetooth specification: "7.8.44 LE Set Address Resolution Enable command This command shall not be used when: - Advertising (other than periodic advertising) is enabled, - Scanning is enabled, or - an HCI_LE_Create_Connection, HCI_LE_Extended_Create_Connection, or HCI_LE_Periodic_Advertising_Create_Sync command is outstanding." If the host is using RPA, the controller needs to generate RPA for the advertising, so the advertising must remain paused during the active scan. If the host is not using RPA, the advertising can be resumed after disabling the address resolution. Fixes: 9afc675edeeb ("Bluetooth: hci_sync: allow advertise when scan without RPA") Signed-off-by: Zhengping Jiang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 64 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 117eedb6f709..7e152e912e8c 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2367,6 +2367,45 @@ static int hci_resume_advertising_sync(struct hci_dev *hdev) return err; } +static int hci_pause_addr_resolution(struct hci_dev *hdev) +{ + int err; + + if (!use_ll_privacy(hdev)) + return 0; + + if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) + return 0; + + /* Cannot disable addr resolution if scanning is enabled or + * when initiating an LE connection. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN) || + hci_lookup_le_connect(hdev)) { + bt_dev_err(hdev, "Command not allowed when scan/LE connect"); + return -EPERM; + } + + /* Cannot disable addr resolution if advertising is enabled. */ + err = hci_pause_advertising_sync(hdev); + if (err) { + bt_dev_err(hdev, "Pause advertising failed: %d", err); + return err; + } + + err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00); + if (err) + bt_dev_err(hdev, "Unable to disable Address Resolution: %d", + err); + + /* Return if address resolution is disabled and RPA is not used. */ + if (!err && scan_use_rpa(hdev)) + return err; + + hci_resume_advertising_sync(hdev); + return err; +} + struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev, bool extended, struct sock *sk) { @@ -2402,7 +2441,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) u8 filter_policy; int err; - /* Pause advertising if resolving list can be used as controllers are + /* Pause advertising if resolving list can be used as controllers * cannot accept resolving list modifications while advertising. */ if (use_ll_privacy(hdev)) { @@ -5394,27 +5433,12 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval) cancel_interleave_scan(hdev); - /* Pause advertising since active scanning disables address resolution - * which advertising depend on in order to generate its RPAs. - */ - if (use_ll_privacy(hdev) && hci_dev_test_flag(hdev, HCI_PRIVACY)) { - err = hci_pause_advertising_sync(hdev); - if (err) { - bt_dev_err(hdev, "pause advertising failed: %d", err); - goto failed; - } - } - - /* Disable address resolution while doing active scanning since the - * accept list shall not be used and all reports shall reach the host - * anyway. + /* Pause address resolution for active scan and stop advertising if + * privacy is enabled. */ - err = hci_le_set_addr_resolution_enable_sync(hdev, 0x00); - if (err) { - bt_dev_err(hdev, "Unable to disable Address Resolution: %d", - err); + err = hci_pause_addr_resolution(hdev); + if (err) goto failed; - } /* All active scans will be done with either a resolvable private * address (when privacy feature has been enabled) or non-resolvable -- cgit From 876e78104f23ce9267822757a63562a609b126c3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 24 Feb 2023 15:43:31 -0800 Subject: Bluetooth: hci_core: Detect if an ACL packet is in fact an ISO packet Because some transports don't have a dedicated type for ISO packets (see 14202eff214e1e941fefa0366d4c3bc4b1a0d500) they may use ACL type when in fact they are ISO packets. In the past this was left for the driver to detect such thing but it creates a problem when using the likes of btproxy when used by a VM as the host would not be aware of the connection the guest is doing it won't be able to detect such behavior, so this make bt_recv_frame detect when it happens as it is the common interface to all drivers including guest VMs. Fixes: 14202eff214e ("Bluetooth: btusb: Detect if an ACL packet is in fact an ISO packet") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b65c3aabcd53..334e308451f5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2871,10 +2871,25 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) return -ENXIO; } - if (hci_skb_pkt_type(skb) != HCI_EVENT_PKT && - hci_skb_pkt_type(skb) != HCI_ACLDATA_PKT && - hci_skb_pkt_type(skb) != HCI_SCODATA_PKT && - hci_skb_pkt_type(skb) != HCI_ISODATA_PKT) { + switch (hci_skb_pkt_type(skb)) { + case HCI_EVENT_PKT: + break; + case HCI_ACLDATA_PKT: + /* Detect if ISO packet has been sent as ACL */ + if (hci_conn_num(hdev, ISO_LINK)) { + __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); + __u8 type; + + type = hci_conn_lookup_type(hdev, hci_handle(handle)); + if (type == ISO_LINK) + hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; + } + break; + case HCI_SCODATA_PKT: + break; + case HCI_ISODATA_PKT: + break; + default: kfree_skb(skb); return -EINVAL; } -- cgit From efe375b716c1c1c9b52a816f5b933a95421020a2 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 24 Feb 2023 15:54:31 -0800 Subject: Bluetooth: btusb: Remove detection of ISO packets over bulk This removes the code introduced by 14202eff214e1e941fefa0366d4c3bc4b1a0d500 as hci_recv_frame is now able to detect ACL packets that are in fact ISO packets. Fixes: 14202eff214e ("Bluetooth: btusb: Detect if an ACL packet is in fact an ISO packet") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 18bc94718711..5c536151ef83 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1050,21 +1050,11 @@ static int btusb_recv_bulk(struct btusb_data *data, void *buffer, int count) hci_skb_expect(skb) -= len; if (skb->len == HCI_ACL_HDR_SIZE) { - __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); __le16 dlen = hci_acl_hdr(skb)->dlen; - __u8 type; /* Complete ACL header */ hci_skb_expect(skb) = __le16_to_cpu(dlen); - /* Detect if ISO packet has been sent over bulk */ - if (hci_conn_num(data->hdev, ISO_LINK)) { - type = hci_conn_lookup_type(data->hdev, - hci_handle(handle)); - if (type == ISO_LINK) - hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; - } - if (skb_tailroom(skb) < hci_skb_expect(skb)) { kfree_skb(skb); skb = NULL; -- cgit From 2f10e40a948e8a2abe7f983df3959a333ca8955f Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 20 Feb 2023 19:38:24 +0000 Subject: Bluetooth: ISO: fix timestamped HCI ISO data packet parsing Use correct HCI ISO data packet header struct when the packet has timestamp. The timestamp, when present, goes before the other fields (Core v5.3 4E 5.4.5), so the structs are not compatible. Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 24444b502e58..8d136a730163 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1620,7 +1620,6 @@ static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason) void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { struct iso_conn *conn = hcon->iso_data; - struct hci_iso_data_hdr *hdr; __u16 pb, ts, len; if (!conn) @@ -1642,6 +1641,8 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) } if (ts) { + struct hci_iso_ts_data_hdr *hdr; + /* TODO: add timestamp to the packet? */ hdr = skb_pull_data(skb, HCI_ISO_TS_DATA_HDR_SIZE); if (!hdr) { @@ -1649,15 +1650,19 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) goto drop; } + len = __le16_to_cpu(hdr->slen); } else { + struct hci_iso_data_hdr *hdr; + hdr = skb_pull_data(skb, HCI_ISO_DATA_HDR_SIZE); if (!hdr) { BT_ERR("Frame is too short (len %d)", skb->len); goto drop; } + + len = __le16_to_cpu(hdr->slen); } - len = __le16_to_cpu(hdr->slen); flags = hci_iso_data_flags(len); len = hci_iso_data_len(len); -- cgit From 294d749b5df5a22d17989833fb1a0a2cd1dfd243 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Tue, 28 Feb 2023 16:31:54 +0530 Subject: Bluetooth: btintel: Iterate only bluetooth device ACPI entries Current flow interates over entire ACPI table entries looking for Bluetooth Per Platform Antenna Gain(PPAG) entry. This patch iterates over ACPI entries relvant to Bluetooth device only. Fixes: c585a92b2f9c ("Bluetooth: btintel: Set Per Platform Antenna Gain(PPAG)") Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 44 ++++++++++++++++++++++++---------------- drivers/bluetooth/btintel.h | 7 ------- include/net/bluetooth/hci_core.h | 1 + 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index bede8b005594..e8d4b59e89c5 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -26,7 +26,14 @@ #define ECDSA_HEADER_LEN 320 #define BTINTEL_PPAG_NAME "PPAG" -#define BTINTEL_PPAG_PREFIX "\\_SB_.PCI0.XHCI.RHUB" + +/* structure to store the PPAG data read from ACPI table */ +struct btintel_ppag { + u32 domain; + u32 mode; + acpi_status status; + struct hci_dev *hdev; +}; #define CMD_WRITE_BOOT_PARAMS 0xfc0e struct cmd_write_boot_params { @@ -1295,17 +1302,16 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data status = acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); if (ACPI_FAILURE(status)) { - bt_dev_warn(hdev, "ACPI Failure: %s", acpi_format_exception(status)); + bt_dev_warn(hdev, "PPAG-BT: ACPI Failure: %s", acpi_format_exception(status)); return status; } - if (strncmp(BTINTEL_PPAG_PREFIX, string.pointer, - strlen(BTINTEL_PPAG_PREFIX))) { + len = strlen(string.pointer); + if (len < strlen(BTINTEL_PPAG_NAME)) { kfree(string.pointer); return AE_OK; } - len = strlen(string.pointer); if (strncmp((char *)string.pointer + len - 4, BTINTEL_PPAG_NAME, 4)) { kfree(string.pointer); return AE_OK; @@ -1314,7 +1320,8 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data status = acpi_evaluate_object(handle, NULL, NULL, &buffer); if (ACPI_FAILURE(status)) { - bt_dev_warn(hdev, "ACPI Failure: %s", acpi_format_exception(status)); + ppag->status = status; + bt_dev_warn(hdev, "PPAG-BT: ACPI Failure: %s", acpi_format_exception(status)); return status; } @@ -1323,8 +1330,9 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data if (p->type != ACPI_TYPE_PACKAGE || p->package.count != 2) { kfree(buffer.pointer); - bt_dev_warn(hdev, "Invalid object type: %d or package count: %d", + bt_dev_warn(hdev, "PPAG-BT: Invalid object type: %d or package count: %d", p->type, p->package.count); + ppag->status = AE_ERROR; return AE_ERROR; } @@ -1335,6 +1343,7 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data ppag->domain = (u32)p->package.elements[0].integer.value; ppag->mode = (u32)p->package.elements[1].integer.value; + ppag->status = AE_OK; kfree(buffer.pointer); return AE_CTRL_TERMINATE; } @@ -2314,12 +2323,11 @@ error: static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver) { - acpi_status status; struct btintel_ppag ppag; struct sk_buff *skb; struct btintel_loc_aware_reg ppag_cmd; - /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */ + /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */ switch (ver->cnvr_top & 0xFFF) { case 0x504: /* Hrp2 */ case 0x202: /* Jfp2 */ @@ -2330,26 +2338,26 @@ static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver memset(&ppag, 0, sizeof(ppag)); ppag.hdev = hdev; - status = acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, NULL, - btintel_ppag_callback, &ppag, NULL); + ppag.status = AE_NOT_FOUND; + acpi_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_HANDLE(GET_HCIDEV_DEV(hdev)), + 1, NULL, btintel_ppag_callback, &ppag, NULL); - if (ACPI_FAILURE(status)) { - /* Do not log warning message if ACPI entry is not found */ - if (status == AE_NOT_FOUND) + if (ACPI_FAILURE(ppag.status)) { + if (ppag.status == AE_NOT_FOUND) { + bt_dev_dbg(hdev, "PPAG-BT: ACPI entry not found"); return; - bt_dev_warn(hdev, "PPAG: ACPI Failure: %s", acpi_format_exception(status)); + } return; } if (ppag.domain != 0x12) { - bt_dev_warn(hdev, "PPAG-BT Domain disabled"); + bt_dev_warn(hdev, "PPAG-BT: domain is not bluetooth"); return; } /* PPAG mode, BIT0 = 0 Disabled, BIT0 = 1 Enabled */ if (!(ppag.mode & BIT(0))) { - bt_dev_dbg(hdev, "PPAG disabled"); + bt_dev_dbg(hdev, "PPAG-BT: disabled"); return; } diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index 8e7da877efae..8fdb65b66315 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -137,13 +137,6 @@ struct intel_offload_use_cases { __u8 preset[8]; } __packed; -/* structure to store the PPAG data read from ACPI table */ -struct btintel_ppag { - u32 domain; - u32 mode; - struct hci_dev *hdev; -}; - struct btintel_loc_aware_reg { __le32 mcc; __le32 sel; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7254edfba4c9..6ed9b4d546a7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1613,6 +1613,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn); void hci_conn_del_sysfs(struct hci_conn *conn); #define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->dev.parent = (pdev)) +#define GET_HCIDEV_DEV(hdev) ((hdev)->dev.parent) /* ----- LMP capabilities ----- */ #define lmp_encrypt_capable(dev) ((dev)->features[0][0] & LMP_ENCRYPT) -- cgit From 1c66bee492a5fe00ae3fe890bb693bfc99f994c6 Mon Sep 17 00:00:00 2001 From: Min Li Date: Sat, 4 Mar 2023 21:50:35 +0800 Subject: Bluetooth: Fix race condition in hci_cmd_sync_clear There is a potential race condition in hci_cmd_sync_work and hci_cmd_sync_clear, and could lead to use-after-free. For instance, hci_cmd_sync_work is added to the 'req_workqueue' after cancel_work_sync The entry of 'cmd_sync_work_list' may be freed in hci_cmd_sync_clear, and causing kernel panic when it is used in 'hci_cmd_sync_work'. Here's the call trace: dump_stack_lvl+0x49/0x63 print_report.cold+0x5e/0x5d3 ? hci_cmd_sync_work+0x282/0x320 kasan_report+0xaa/0x120 ? hci_cmd_sync_work+0x282/0x320 __asan_report_load8_noabort+0x14/0x20 hci_cmd_sync_work+0x282/0x320 process_one_work+0x77b/0x11c0 ? _raw_spin_lock_irq+0x8e/0xf0 worker_thread+0x544/0x1180 ? poll_idle+0x1e0/0x1e0 kthread+0x285/0x320 ? process_one_work+0x11c0/0x11c0 ? kthread_complete_and_exit+0x30/0x30 ret_from_fork+0x22/0x30 Allocated by task 266: kasan_save_stack+0x26/0x50 __kasan_kmalloc+0xae/0xe0 kmem_cache_alloc_trace+0x191/0x350 hci_cmd_sync_queue+0x97/0x2b0 hci_update_passive_scan+0x176/0x1d0 le_conn_complete_evt+0x1b5/0x1a00 hci_le_conn_complete_evt+0x234/0x340 hci_le_meta_evt+0x231/0x4e0 hci_event_packet+0x4c5/0xf00 hci_rx_work+0x37d/0x880 process_one_work+0x77b/0x11c0 worker_thread+0x544/0x1180 kthread+0x285/0x320 ret_from_fork+0x22/0x30 Freed by task 269: kasan_save_stack+0x26/0x50 kasan_set_track+0x25/0x40 kasan_set_free_info+0x24/0x40 ____kasan_slab_free+0x176/0x1c0 __kasan_slab_free+0x12/0x20 slab_free_freelist_hook+0x95/0x1a0 kfree+0xba/0x2f0 hci_cmd_sync_clear+0x14c/0x210 hci_unregister_dev+0xff/0x440 vhci_release+0x7b/0xf0 __fput+0x1f3/0x970 ____fput+0xe/0x20 task_work_run+0xd4/0x160 do_exit+0x8b0/0x22a0 do_group_exit+0xba/0x2a0 get_signal+0x1e4a/0x25b0 arch_do_signal_or_restart+0x93/0x1f80 exit_to_user_mode_prepare+0xf5/0x1a0 syscall_exit_to_user_mode+0x26/0x50 ret_from_fork+0x15/0x30 Fixes: 6a98e3836fa2 ("Bluetooth: Add helper for serialized HCI command execution") Cc: stable@vger.kernel.org Signed-off-by: Min Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 7e152e912e8c..5b8dc8fb2e27 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -643,6 +643,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) cancel_work_sync(&hdev->cmd_sync_work); cancel_work_sync(&hdev->reenable_adv_work); + mutex_lock(&hdev->cmd_sync_work_lock); list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { if (entry->destroy) entry->destroy(hdev, entry->data, -ECANCELED); @@ -650,6 +651,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) list_del(&entry->list); kfree(entry); } + mutex_unlock(&hdev->cmd_sync_work_lock); } void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err) -- cgit From 52dd5e964a55c98c1b0bcf5fc737a5ddd00e7d4d Mon Sep 17 00:00:00 2001 From: Brian Gix Date: Mon, 6 Mar 2023 14:32:21 -0800 Subject: Bluetooth: Remove "Power-on" check from Mesh feature The Bluetooth mesh experimental feature enable was requiring the controller to be powered off in order for the Enable to work. Mesh is supposed to be enablable regardless of the controller state, and created an unintended requirement that the mesh daemon be started before the classic bluetoothd daemon. Fixes: af6bcc1921ff ("Bluetooth: Add experimental wrapper for MGMT based mesh") Signed-off-by: Brian Gix Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7add66f30e4d..39589f864ea7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4639,12 +4639,6 @@ static int set_mgmt_mesh_func(struct sock *sk, struct hci_dev *hdev, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_INVALID_INDEX); - /* Changes can only be made when controller is powered down */ - if (hdev_is_powered(hdev)) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_EXP_FEATURE, - MGMT_STATUS_REJECTED); - /* Parameters are limited to a single octet */ if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) return mgmt_cmd_status(sk, hdev->id, -- cgit From c79493c3ccf06a3aeb72017a96ca3dfd166bc16b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 22 Mar 2023 01:28:31 +0200 Subject: net: enetc: fix aggregate RMON counters not showing the ranges When running "ethtool -S eno0 --groups rmon" without an explicit "--src emac|pmac" argument, the kernel will not report rx-rmon-etherStatsPkts64to64Octets, rx-rmon-etherStatsPkts65to127Octets, etc. This is because on ETHTOOL_MAC_STATS_SRC_AGGREGATE, we do not populate the "ranges" argument. ocelot_port_get_rmon_stats() does things differently and things work there. I had forgotten to make sure that the code is structured the same way in both drivers, so do that now. Fixes: cf52bd238b75 ("net: enetc: add support for MAC Merge statistics counters") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230321232831.1200905-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_ethtool.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index bca68edfbe9c..da9d4b310fcd 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -370,8 +370,7 @@ static const struct ethtool_rmon_hist_range enetc_rmon_ranges[] = { }; static void enetc_rmon_stats(struct enetc_hw *hw, int mac, - struct ethtool_rmon_stats *s, - const struct ethtool_rmon_hist_range **ranges) + struct ethtool_rmon_stats *s) { s->undersize_pkts = enetc_port_rd(hw, ENETC_PM_RUND(mac)); s->oversize_pkts = enetc_port_rd(hw, ENETC_PM_ROVR(mac)); @@ -393,8 +392,6 @@ static void enetc_rmon_stats(struct enetc_hw *hw, int mac, s->hist_tx[4] = enetc_port_rd(hw, ENETC_PM_T1023(mac)); s->hist_tx[5] = enetc_port_rd(hw, ENETC_PM_T1522(mac)); s->hist_tx[6] = enetc_port_rd(hw, ENETC_PM_T1523X(mac)); - - *ranges = enetc_rmon_ranges; } static void enetc_get_eth_mac_stats(struct net_device *ndev, @@ -447,13 +444,15 @@ static void enetc_get_rmon_stats(struct net_device *ndev, struct enetc_hw *hw = &priv->si->hw; struct enetc_si *si = priv->si; + *ranges = enetc_rmon_ranges; + switch (rmon_stats->src) { case ETHTOOL_MAC_STATS_SRC_EMAC: - enetc_rmon_stats(hw, 0, rmon_stats, ranges); + enetc_rmon_stats(hw, 0, rmon_stats); break; case ETHTOOL_MAC_STATS_SRC_PMAC: if (si->hw_features & ENETC_SI_F_QBU) - enetc_rmon_stats(hw, 1, rmon_stats, ranges); + enetc_rmon_stats(hw, 1, rmon_stats); break; case ETHTOOL_MAC_STATS_SRC_AGGREGATE: ethtool_aggregate_rmon_stats(ndev, rmon_stats); -- cgit From 758d29fb3a8b3c756b4e4e0aa9b32ca8cfaf3feb Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Sun, 19 Mar 2023 19:37:58 +0000 Subject: tools: ynl: Fix genlmsg header encoding formats The pack strings use 'b' signed char for cmd and version but struct genlmsghdr defines them as unsigned char. Use 'B' instead. Fixes: 4e4480e89c47 ("tools: ynl: move the cli and netlink code around") Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20230319193803.97453-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 90764a83c646..32536e1f9064 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -200,7 +200,7 @@ def _genl_msg(nl_type, nl_flags, genl_cmd, genl_version, seq=None): if seq is None: seq = random.randint(1, 1024) nlmsg = struct.pack("HHII", nl_type, nl_flags, seq, 0) - genlmsg = struct.pack("bbH", genl_cmd, genl_version, 0) + genlmsg = struct.pack("BBH", genl_cmd, genl_version, 0) return nlmsg + genlmsg @@ -264,7 +264,7 @@ class GenlMsg: self.hdr = nl_msg.raw[0:4] self.raw = nl_msg.raw[4:] - self.genl_cmd, self.genl_version, _ = struct.unpack("bbH", self.hdr) + self.genl_cmd, self.genl_version, _ = struct.unpack("BBH", self.hdr) self.raw_attrs = NlAttrs(self.raw) @@ -358,7 +358,7 @@ class YnlFamily(SpecFamily): raw >>= 1 i += 1 else: - value = enum['entries'][raw - i] + value = enum.entries_by_val[raw - i].name rsp[attr_spec['name']] = value def _decode(self, attrs, space): -- cgit From 2f4e429c846972c8405951a9ff7a82aceeca7461 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 20 Feb 2023 13:02:11 +0000 Subject: cifs: lock chan_lock outside match_session Coverity had rightly indicated a possible deadlock due to chan_lock being done inside match_session. All callers of match_* functions should pick up the necessary locks and call them. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Fixes: 724244cdb382 ("cifs: protect session channel fields with chan_lock") Signed-off-by: Steve French --- fs/cifs/connect.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 49b37594e991..f42cc7077312 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1721,7 +1721,7 @@ out_err: return ERR_PTR(rc); } -/* this function must be called with ses_lock held */ +/* this function must be called with ses_lock and chan_lock held */ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) { if (ctx->sectype != Unspecified && @@ -1732,12 +1732,8 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) * If an existing session is limited to less channels than * requested, it should not be reused */ - spin_lock(&ses->chan_lock); - if (ses->chan_max < ctx->max_channels) { - spin_unlock(&ses->chan_lock); + if (ses->chan_max < ctx->max_channels) return 0; - } - spin_unlock(&ses->chan_lock); switch (ses->sectype) { case Kerberos: @@ -1865,10 +1861,13 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) spin_unlock(&ses->ses_lock); continue; } + spin_lock(&ses->chan_lock); if (!match_session(ses, ctx)) { + spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); continue; } + spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); ++ses->ses_count; @@ -2693,6 +2692,7 @@ cifs_match_super(struct super_block *sb, void *data) spin_lock(&tcp_srv->srv_lock); spin_lock(&ses->ses_lock); + spin_lock(&ses->chan_lock); spin_lock(&tcon->tc_lock); if (!match_server(tcp_srv, ctx, dfs_super_cmp) || !match_session(ses, ctx) || @@ -2705,6 +2705,7 @@ cifs_match_super(struct super_block *sb, void *data) rc = compare_mount_options(sb, mnt_data); out: spin_unlock(&tcon->tc_lock); + spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); spin_unlock(&tcp_srv->srv_lock); -- cgit From 68c3e4fc8628b1487c965aabb29207249657eb5f Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Tue, 21 Mar 2023 10:23:32 -0700 Subject: gve: Cache link_speed value from device The link speed is never changed for the uptime of a VM, and the current implementation sends an admin queue command for each call. Admin queue command invocations have nontrivial overhead (e.g., VM exits), which can be disruptive to users if triggered frequently. Our telemetry data shows that there are VMs that make frequent calls to this admin queue command. Caching the result of the original admin queue command would eliminate the need to send multiple admin queue commands on subsequent calls to retrieve link speed. Fixes: 7e074d5a76ca ("gve: Enable Link Speed Reporting in the driver.") Signed-off-by: Joshua Washington Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230321172332.91678-1-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index ce574d097e28..5f81470843b4 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -537,7 +537,10 @@ static int gve_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { struct gve_priv *priv = netdev_priv(netdev); - int err = gve_adminq_report_link_speed(priv); + int err = 0; + + if (priv->link_speed == 0) + err = gve_adminq_report_link_speed(priv); cmd->base.speed = priv->link_speed; return err; -- cgit From 8eac0095de355ee31e1b014f79f83d2cd62a2d04 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Tue, 21 Mar 2023 10:05:39 -0700 Subject: net: asix: fix modprobe "sysfs: cannot create duplicate filename" "modprobe asix ; rmmod asix ; modprobe asix" fails with: sysfs: cannot create duplicate filename \ '/devices/virtual/mdio_bus/usb-003:004' Issue was originally reported by Anton Lundin on 2022-06-22 (link below). Chrome OS team hit the same issue in Feb, 2023 when trying to find work arounds for other issues with AX88172 devices. The use of devm_mdiobus_register() with usbnet devices results in the MDIO data being associated with the USB device. When the asix driver is unloaded, the USB device continues to exist and the corresponding "mdiobus_unregister()" is NOT called until the USB device is unplugged or unauthorized. So the next "modprobe asix" will fail because the MDIO phy sysfs attributes still exist. The 'easy' (from a design PoV) fix is to use the non-devm variants of mdiobus_* functions and explicitly manage this use in the asix_bind and asix_unbind function calls. I've not explored trying to fix usbnet initialization so devm_* stuff will work. Fixes: e532a096be0e5 ("net: usb: asix: ax88772: add phylib support") Reported-by: Anton Lundin Link: https://lore.kernel.org/netdev/20220623063649.GD23685@pengutronix.de/T/ Tested-by: Eizan Miyamoto Signed-off-by: Grant Grundler Link: https://lore.kernel.org/r/20230321170539.732147-1-grundler@chromium.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_devices.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 743cbf5d662c..f7cff58fe044 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -666,8 +666,9 @@ static int asix_resume(struct usb_interface *intf) static int ax88772_init_mdio(struct usbnet *dev) { struct asix_common_private *priv = dev->driver_priv; + int ret; - priv->mdio = devm_mdiobus_alloc(&dev->udev->dev); + priv->mdio = mdiobus_alloc(); if (!priv->mdio) return -ENOMEM; @@ -679,7 +680,20 @@ static int ax88772_init_mdio(struct usbnet *dev) snprintf(priv->mdio->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); - return devm_mdiobus_register(&dev->udev->dev, priv->mdio); + ret = mdiobus_register(priv->mdio); + if (ret) { + netdev_err(dev->net, "Could not register MDIO bus (err %d)\n", ret); + mdiobus_free(priv->mdio); + priv->mdio = NULL; + } + + return ret; +} + +static void ax88772_mdio_unregister(struct asix_common_private *priv) +{ + mdiobus_unregister(priv->mdio); + mdiobus_free(priv->mdio); } static int ax88772_init_phy(struct usbnet *dev) @@ -896,16 +910,23 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) ret = ax88772_init_mdio(dev); if (ret) - return ret; + goto mdio_err; ret = ax88772_phylink_setup(dev); if (ret) - return ret; + goto phylink_err; ret = ax88772_init_phy(dev); if (ret) - phylink_destroy(priv->phylink); + goto initphy_err; + return 0; + +initphy_err: + phylink_destroy(priv->phylink); +phylink_err: + ax88772_mdio_unregister(priv); +mdio_err: return ret; } @@ -926,6 +947,7 @@ static void ax88772_unbind(struct usbnet *dev, struct usb_interface *intf) phylink_disconnect_phy(priv->phylink); rtnl_unlock(); phylink_destroy(priv->phylink); + ax88772_mdio_unregister(priv); asix_rx_fixup_common_free(dev->driver_priv); } -- cgit From 8f058a6ef99f0b88a177b58cc46a44ff5112e40a Mon Sep 17 00:00:00 2001 From: Arınç ÜNAL Date: Mon, 20 Mar 2023 22:05:18 +0300 Subject: net: dsa: mt7530: move enabling disabling core clock to mt7530_pll_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the code that enables and disables TRGMII clocks and core clock. Move enabling and disabling core clock to mt7530_pll_setup() as it's supposed to be run there. Add 20 ms delay before enabling the core clock as seen on the U-Boot MediaTek ethernet driver. Change the comment for enabling and disabling TRGMII clocks as the code seems to affect both TXC and RXC. Tested rgmii and trgmii modes of port 6 and rgmii mode of port 5 on MCM MT7530 on MT7621AT Unielec U7621-06 and standalone MT7530 on MT7623NI Bananapi BPI-R2. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Link: https://source.denx.de/u-boot/u-boot/-/blob/29a48bf9ccba45a5e560bb564bbe76e42629325f/drivers/net/mtk_eth.c#L589 Tested-by: Arınç ÜNAL Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230320190520.124513-1-arinc.unal@arinc9.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c2d81b7a429d..d4a559007973 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -396,6 +396,9 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, /* Set up switch core clock for MT7530 */ static void mt7530_pll_setup(struct mt7530_priv *priv) { + /* Disable core clock */ + core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); + /* Disable PLL */ core_write(priv, CORE_GSWPLL_GRP1, 0); @@ -409,6 +412,11 @@ static void mt7530_pll_setup(struct mt7530_priv *priv) RG_GSWPLL_EN_PRE | RG_GSWPLL_POSDIV_200M(2) | RG_GSWPLL_FBKDIV_200M(32)); + + udelay(20); + + /* Enable core clock */ + core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); } /* Setup TX circuit including relevant PAD and driving */ @@ -466,9 +474,8 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), TD_DM_DRVP(8) | TD_DM_DRVN(8)); - /* Disable MT7530 core and TRGMII Tx clocks */ - core_clear(priv, CORE_TRGMII_GSW_CLK_CG, - REG_GSWCK_EN | REG_TRGMIICK_EN); + /* Disable the MT7530 TRGMII clocks */ + core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN); /* Setup the MT7530 TRGMII Tx Clock */ core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1)); @@ -485,9 +492,8 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) | RG_LCDDS_PWDB | RG_LCDDS_ISO_EN); - /* Enable MT7530 core and TRGMII Tx clocks */ - core_set(priv, CORE_TRGMII_GSW_CLK_CG, - REG_GSWCK_EN | REG_TRGMIICK_EN); + /* Enable the MT7530 TRGMII clocks */ + core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN); } else { for (i = 0 ; i < NUM_TRGMII_CTRL; i++) mt7530_rmw(priv, MT7530_TRGMII_RD(i), -- cgit From fdcc8ccd823740c18e803b886cec461bc0e64201 Mon Sep 17 00:00:00 2001 From: Arınç ÜNAL Date: Mon, 20 Mar 2023 22:05:19 +0300 Subject: net: dsa: mt7530: move lowering TRGMII driving to mt7530_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move lowering the TRGMII Tx clock driving to mt7530_setup(), after setting the core clock, as seen on the U-Boot MediaTek ethernet driver. Move the code which looks like it lowers the TRGMII Rx clock driving to after the TRGMII Tx clock driving is lowered. This is run after lowering the Tx clock driving on the U-Boot MediaTek ethernet driver as well. This way, the switch should consume less power regardless of port 6 being used. Update the comment explaining mt7530_pad_clk_setup(). Tested rgmii and trgmii modes of port 6 and rgmii mode of port 5 on MCM MT7530 on MT7621AT Unielec U7621-06 and standalone MT7530 on MT7623NI Bananapi BPI-R2. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Link: https://source.denx.de/u-boot/u-boot/-/blob/29a48bf9ccba45a5e560bb564bbe76e42629325f/drivers/net/mtk_eth.c#L682 Tested-by: Arınç ÜNAL Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230320190520.124513-2-arinc.unal@arinc9.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d4a559007973..8831bd409a40 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -419,12 +419,12 @@ static void mt7530_pll_setup(struct mt7530_priv *priv) core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); } -/* Setup TX circuit including relevant PAD and driving */ +/* Setup port 6 interface mode and TRGMII TX circuit */ static int mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) { struct mt7530_priv *priv = ds->priv; - u32 ncpo1, ssc_delta, trgint, i, xtal; + u32 ncpo1, ssc_delta, trgint, xtal; xtal = mt7530_read(priv, MT7530_MHWTRAP) & HWTRAP_XTAL_MASK; @@ -469,11 +469,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) P6_INTF_MODE(trgint)); if (trgint) { - /* Lower Tx Driving for TRGMII path */ - for (i = 0 ; i < NUM_TRGMII_CTRL ; i++) - mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), - TD_DM_DRVP(8) | TD_DM_DRVN(8)); - /* Disable the MT7530 TRGMII clocks */ core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN); @@ -494,10 +489,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) /* Enable the MT7530 TRGMII clocks */ core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_TRGMIICK_EN); - } else { - for (i = 0 ; i < NUM_TRGMII_CTRL; i++) - mt7530_rmw(priv, MT7530_TRGMII_RD(i), - RD_TAP_MASK, RD_TAP(16)); } return 0; @@ -2207,6 +2198,15 @@ mt7530_setup(struct dsa_switch *ds) mt7530_pll_setup(priv); + /* Lower Tx driving for TRGMII path */ + for (i = 0; i < NUM_TRGMII_CTRL; i++) + mt7530_write(priv, MT7530_TRGMII_TD_ODT(i), + TD_DM_DRVP(8) | TD_DM_DRVN(8)); + + for (i = 0; i < NUM_TRGMII_CTRL; i++) + mt7530_rmw(priv, MT7530_TRGMII_RD(i), + RD_TAP_MASK, RD_TAP(16)); + /* Enable port 6 */ val = mt7530_read(priv, MT7530_MHWTRAP); val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS; -- cgit From 407b508bdd70b6848993843d96ed49ac4108fb52 Mon Sep 17 00:00:00 2001 From: Arınç ÜNAL Date: Mon, 20 Mar 2023 22:05:20 +0300 Subject: net: dsa: mt7530: move setting ssc_delta to PHY_INTERFACE_MODE_TRGMII case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move setting the ssc_delta variable to under the PHY_INTERFACE_MODE_TRGMII case as it's only needed when trgmii is used. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230320190520.124513-3-arinc.unal@arinc9.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 8831bd409a40..02410ac439b7 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -441,6 +441,10 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) break; case PHY_INTERFACE_MODE_TRGMII: trgint = 1; + if (xtal == HWTRAP_XTAL_25MHZ) + ssc_delta = 0x57; + else + ssc_delta = 0x87; if (priv->id == ID_MT7621) { /* PLL frequency: 150MHz: 1.2GBit */ if (xtal == HWTRAP_XTAL_40MHZ) @@ -460,11 +464,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) return -EINVAL; } - if (xtal == HWTRAP_XTAL_25MHZ) - ssc_delta = 0x57; - else - ssc_delta = 0x87; - mt7530_rmw(priv, MT7530_P6ECR, P6_INTF_MODE_MASK, P6_INTF_MODE(trgint)); -- cgit From b1de5c78ebe9858ccec9d49af2f76724f1d47e3e Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 22 Mar 2023 14:20:57 +0800 Subject: net: mdio: thunder: Add missing fwnode_handle_put() In device_for_each_child_node(), we should add fwnode_handle_put() when break out of the iteration device_for_each_child_node() as it will automatically increase and decrease the refcounter. Fixes: 379d7ac7ca31 ("phy: mdio-thunder: Add driver for Cavium Thunder SoC MDIO buses.") Signed-off-by: Liang He Signed-off-by: David S. Miller --- drivers/net/mdio/mdio-thunder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c index 3847ee92c109..6067d96b2b7b 100644 --- a/drivers/net/mdio/mdio-thunder.c +++ b/drivers/net/mdio/mdio-thunder.c @@ -106,6 +106,7 @@ static int thunder_mdiobus_pci_probe(struct pci_dev *pdev, if (i >= ARRAY_SIZE(nexus->buses)) break; } + fwnode_handle_put(fwn); return 0; err_release_regions: -- cgit From fc3608aaa5751318837e4bbe0282b3836bca5080 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 22 Mar 2023 01:11:18 +0100 Subject: efi/libstub: Use relocated version of kernel's struct screen_info In some cases, we expose the kernel's struct screen_info to the EFI stub directly, so it gets populated before even entering the kernel. This means the early console is available as soon as the early param parsing happens, which is nice. It also means we need two different ways to pass this information, as this trick only works if the EFI stub is baked into the core kernel image, which is not always the case. Huacai reports that the preparatory refactoring that was needed to implement this alternative method for zboot resulted in a non-functional efifb earlycon for other cases as well, due to the reordering of the kernel image relocation with the population of the screen_info struct, and the latter now takes place after copying the image to its new location, which means we copy the old, uninitialized state. So let's ensure that the same-image version of alloc_screen_info() produces the correct screen_info pointer, by taking the displacement of the loaded image into account. Reported-by: Huacai Chen Tested-by: Huacai Chen Link: https://lore.kernel.org/linux-efi/20230310021749.921041-1-chenhuacai@loongson.cn/ Fixes: 42c8ea3dca094ab8 ("efi: libstub: Factor out EFI stub entrypoint into separate file") Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64-stub.c | 4 +++- drivers/firmware/efi/libstub/efi-stub-entry.c | 11 +++++++++++ drivers/firmware/efi/libstub/efi-stub.c | 5 ----- drivers/firmware/efi/libstub/efistub.h | 1 + drivers/firmware/efi/libstub/screen_info.c | 9 +-------- drivers/firmware/efi/libstub/zboot.c | 5 +++++ 6 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index b996553cdb4c..770b8ecb7398 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -85,8 +85,10 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } } - if (image->image_base != _text) + if (image->image_base != _text) { efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n"); + image->image_base = _text; + } if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN)) efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n", diff --git a/drivers/firmware/efi/libstub/efi-stub-entry.c b/drivers/firmware/efi/libstub/efi-stub-entry.c index 5245c4f031c0..cc4dcaea67fa 100644 --- a/drivers/firmware/efi/libstub/efi-stub-entry.c +++ b/drivers/firmware/efi/libstub/efi-stub-entry.c @@ -5,6 +5,15 @@ #include "efistub.h" +static unsigned long screen_info_offset; + +struct screen_info *alloc_screen_info(void) +{ + if (IS_ENABLED(CONFIG_ARM)) + return __alloc_screen_info(); + return (void *)&screen_info + screen_info_offset; +} + /* * EFI entry point for the generic EFI stub used by ARM, arm64, RISC-V and * LoongArch. This is the entrypoint that is described in the PE/COFF header @@ -56,6 +65,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, return status; } + screen_info_offset = image_addr - (unsigned long)image->image_base; + status = efi_stub_common(handle, image, image_addr, cmdline_ptr); efi_free(image_size, image_addr); diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index 2955c1ac6a36..f9c1e8a2bd1d 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -47,11 +47,6 @@ static u64 virtmap_base = EFI_RT_VIRTUAL_BASE; static bool flat_va_mapping = (EFI_RT_VIRTUAL_OFFSET != 0); -struct screen_info * __weak alloc_screen_info(void) -{ - return &screen_info; -} - void __weak free_screen_info(struct screen_info *si) { } diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index bd9c38a93bbc..148013bcb5f8 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -1062,6 +1062,7 @@ efi_enable_reset_attack_mitigation(void) { } void efi_retrieve_tpm2_eventlog(void); struct screen_info *alloc_screen_info(void); +struct screen_info *__alloc_screen_info(void); void free_screen_info(struct screen_info *si); void efi_cache_sync_image(unsigned long image_base, diff --git a/drivers/firmware/efi/libstub/screen_info.c b/drivers/firmware/efi/libstub/screen_info.c index 8e76a8b384ba..4be1c4d1f922 100644 --- a/drivers/firmware/efi/libstub/screen_info.c +++ b/drivers/firmware/efi/libstub/screen_info.c @@ -15,18 +15,11 @@ * early, but it only works if the EFI stub is part of the core kernel image * itself. The zboot decompressor can only use the configuration table * approach. - * - * In order to support both methods from the same build of the EFI stub - * library, provide this dummy global definition of struct screen_info. If it - * is required to satisfy a link dependency, it means we need to override the - * __weak alloc and free methods with the ones below, and those will be pulled - * in as well. */ -struct screen_info screen_info; static efi_guid_t screen_info_guid = LINUX_EFI_SCREEN_INFO_TABLE_GUID; -struct screen_info *alloc_screen_info(void) +struct screen_info *__alloc_screen_info(void) { struct screen_info *si; efi_status_t status; diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c index ba234e062a1a..6105e5e2eda4 100644 --- a/drivers/firmware/efi/libstub/zboot.c +++ b/drivers/firmware/efi/libstub/zboot.c @@ -57,6 +57,11 @@ void __weak efi_cache_sync_image(unsigned long image_base, // executable code loaded into memory to be safe for execution. } +struct screen_info *alloc_screen_info(void) +{ + return __alloc_screen_info(); +} + asmlinkage efi_status_t __efiapi efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab) { -- cgit From aaee0ce460b954e08b6e630d7e54b2abb672feb8 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Wed, 15 Mar 2023 15:52:09 +0800 Subject: drm/amdgpu: reposition the gpu reset checking for reuse Move the amdgpu_acpi_should_gpu_reset out of CONFIG_SUSPEND to share it with hibernate case. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 41 ++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 164141bc8b4a..a6b312da021e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1391,10 +1391,12 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps); +bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); void amdgpu_acpi_detect(void); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } +static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline void amdgpu_acpi_detect(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, @@ -1405,11 +1407,9 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); -bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } -static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index d4196fcb85a0..60b1857f469e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -971,6 +971,29 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev) return true; } + +/** + * amdgpu_acpi_should_gpu_reset + * + * @adev: amdgpu_device_pointer + * + * returns true if should reset GPU, false if not + */ +bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) +{ + if (adev->flags & AMD_IS_APU) + return false; + + if (amdgpu_sriov_vf(adev)) + return false; + +#if IS_ENABLED(CONFIG_SUSPEND) + return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; +#else + return true; +#endif +} + /* * amdgpu_acpi_detect - detect ACPI ATIF/ATCS methods * @@ -1042,24 +1065,6 @@ bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) (pm_suspend_target_state == PM_SUSPEND_MEM); } -/** - * amdgpu_acpi_should_gpu_reset - * - * @adev: amdgpu_device_pointer - * - * returns true if should reset GPU, false if not - */ -bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) -{ - if (adev->flags & AMD_IS_APU) - return false; - - if (amdgpu_sriov_vf(adev)) - return false; - - return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; -} - /** * amdgpu_acpi_is_s0ix_active * -- cgit From b589626674de94d977e81c99bf7905872b991197 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 9 Mar 2023 16:27:51 +0800 Subject: drm/amdgpu: skip ASIC reset for APUs when go to S4 For GC IP v11.0.4/11, PSP TMR need to be reserved for ASIC mode2 reset. But for S4, when psp suspend, it will destroy the TMR that fails the ASIC reset. [ 96.006101] amdgpu 0000:62:00.0: amdgpu: MODE2 reset [ 100.409717] amdgpu 0000:62:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x00000011 SMN_C2PMSG_82:0x00000002 [ 100.411593] amdgpu 0000:62:00.0: amdgpu: Mode2 reset failed! [ 100.412470] amdgpu 0000:62:00.0: PM: pci_pm_freeze(): amdgpu_pmops_freeze+0x0/0x50 [amdgpu] returns -62 [ 100.414020] amdgpu 0000:62:00.0: PM: dpm_run_callback(): pci_pm_freeze+0x0/0xd0 returns -62 [ 100.415311] amdgpu 0000:62:00.0: PM: pci_pm_freeze+0x0/0xd0 returned -62 after 4623202 usecs [ 100.416608] amdgpu 0000:62:00.0: PM: failed to freeze async: error -62 We can skip the reset on APUs, assuming we can resume them properly. Verified on some GFX11, GFX10 and old GFX9 APUs. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f5ffca24def4..ba5def374368 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2467,7 +2467,10 @@ static int amdgpu_pmops_freeze(struct device *dev) adev->in_s4 = false; if (r) return r; - return amdgpu_asic_reset(adev); + + if (amdgpu_acpi_should_gpu_reset(adev)) + return amdgpu_asic_reset(adev); + return 0; } static int amdgpu_pmops_thaw(struct device *dev) -- cgit From 4eb0b49a0ad3e004a6a65b84efe37bc7e66d560f Mon Sep 17 00:00:00 2001 From: Tong Liu01 Date: Wed, 15 Mar 2023 15:24:22 +0800 Subject: drm/amdgpu: add mes resume when do gfx post soft reset [why] when gfx do soft reset, mes will also do reset, if mes is not resumed when do recover from soft reset, mes is unable to respond in later sequence [how] resume mes when do gfx post soft reset Signed-off-by: Tong Liu01 Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 3bf697a80cf2..08650f93f210 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4655,6 +4655,14 @@ static bool gfx_v11_0_check_soft_reset(void *handle) return false; } +static int gfx_v11_0_post_soft_reset(void *handle) +{ + /** + * GFX soft reset will impact MES, need resume MES when do GFX soft reset + */ + return amdgpu_mes_resume((struct amdgpu_device *)handle); +} + static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; @@ -6166,6 +6174,7 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .wait_for_idle = gfx_v11_0_wait_for_idle, .soft_reset = gfx_v11_0_soft_reset, .check_soft_reset = gfx_v11_0_check_soft_reset, + .post_soft_reset = gfx_v11_0_post_soft_reset, .set_clockgating_state = gfx_v11_0_set_clockgating_state, .set_powergating_state = gfx_v11_0_set_powergating_state, .get_clockgating_state = gfx_v11_0_get_clockgating_state, -- cgit From 033c56474acf567a450f8bafca50e0b610f2b716 Mon Sep 17 00:00:00 2001 From: YuBiao Wang Date: Thu, 16 Mar 2023 11:30:32 +0800 Subject: drm/amdgpu: Force signal hw_fences that are embedded in non-sched jobs [Why] For engines not supporting soft reset, i.e. VCN, there will be a failed ib test before mode 1 reset during asic reset. The fences in this case are never signaled and next time when we try to free the sa_bo, kernel will hang. [How] During pre_asic_reset, driver will clear job fences and afterwards the fences' refcount will be reduced to 1. For drm_sched_jobs it will be released in job_free_cb, and for non-sched jobs like ib_test, it's meant to be released in sa_bo_free but only when the fences are signaled. So we have to force signal the non_sched bad job's fence during pre_asic_reset or the clear is not complete. Signed-off-by: YuBiao Wang Acked-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index faff4a3f96e6..f52d0ba91a77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -678,6 +678,15 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) ptr = &ring->fence_drv.fences[i]; old = rcu_dereference_protected(*ptr, 1); if (old && old->ops == &amdgpu_job_fence_ops) { + struct amdgpu_job *job; + + /* For non-scheduler bad job, i.e. failed ib test, we need to signal + * it right here or we won't be able to track them in fence_drv + * and they will remain unsignaled during sa_bo free. + */ + job = container_of(old, struct amdgpu_job, hw_fence); + if (!job->base.s_fence && !dma_fence_is_signaled(old)) + dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); dma_fence_put(old); } -- cgit From e06bfcc1a1c41bcb8c31470d437e147ce9f0acfd Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Wed, 15 Mar 2023 18:59:59 +0800 Subject: drm/amdgpu/gfx: set cg flags to enter/exit safe mode sriov needs to enter/exit safe mode in update umd p state add the cg flag to let it enter or exit while needed Signed-off-by: Jane Jian Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 08650f93f210..ecf8ceb53311 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1287,6 +1287,11 @@ static int gfx_v11_0_sw_init(void *handle) break; } + /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) && + amdgpu_sriov_is_pp_one_vf(adev)) + adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; + /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, -- cgit From 6d457ca162da98a6a1a381320e936d7448177de9 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Mon, 6 Mar 2023 11:39:51 +0800 Subject: drm/amd/display: remove outdated 8bpc comments [Why] The commit c76e483cd916 ("drm/amd/display: Don't restrict bpc to 8 bpc") removes the historical 8bpc dependency and sets max_bpc to 16. [How] The comment that states "8bpc for non-edp" needs to be removed as well. Reviewed-by: Harry Wentland Acked-by: Qingqing Zhuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 32abbafd43fa..a01fd41643fc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7244,7 +7244,6 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, if (!aconnector->mst_root) drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16); - /* This defaults to the max in the range, but we want 8bpc for non-edp. */ aconnector->base.state->max_bpc = 16; aconnector->base.state->max_requested_bpc = aconnector->base.state->max_bpc; -- cgit From 2b072442f4962231a8516485012bb2d2551ef2fe Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 15 Mar 2023 20:07:23 +0800 Subject: drm/amdgpu/nv: Apply ASPM quirk on Intel ADL + AMD Navi S2idle resume freeze can be observed on Intel ADL + AMD WX5500. This is caused by commit 0064b0ce85bb ("drm/amd/pm: enable ASPM by default"). The root cause is still not clear for now. So extend and apply the ASPM quirk from commit e02fe3bc7aba ("drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems"), to workaround the issue on Navi cards too. Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2458 Reviewed-by: Alex Deucher Signed-off-by: Kai-Heng Feng Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/nv.c | 2 +- drivers/gpu/drm/amd/amdgpu/vi.c | 17 +---------------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a6b312da021e..39018f784f9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1272,6 +1272,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); +bool amdgpu_device_aspm_support_quirk(void); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index da5b0258a237..3d98fc2ad36b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -80,6 +80,10 @@ #include +#if IS_ENABLED(CONFIG_X86) +#include +#endif + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); @@ -1356,6 +1360,17 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) return pcie_aspm_enabled(adev->pdev); } +bool amdgpu_device_aspm_support_quirk(void) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); +#else + return true; +#endif +} + /* if we get transitioned to only one device, take VGA back */ /** * amdgpu_device_vga_set_decode - enable/disable vga decode diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 22e25ca285f8..ebe0e2d7dbd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -578,7 +578,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev) static void nv_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_device_should_use_aspm(adev)) + if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk()) return; if (!(adev->flags & AMD_IS_APU) && diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 12ef782eb478..ceab8783575c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -81,10 +81,6 @@ #include "mxgpu_vi.h" #include "amdgpu_dm.h" -#if IS_ENABLED(CONFIG_X86) -#include -#endif - #define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6 #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 0x00000001L #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 0x00000002L @@ -1138,24 +1134,13 @@ static void vi_enable_aspm(struct amdgpu_device *adev) WREG32_PCIE(ixPCIE_LC_CNTL, data); } -static bool aspm_support_quirk_check(void) -{ -#if IS_ENABLED(CONFIG_X86) - struct cpuinfo_x86 *c = &cpu_data(0); - - return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); -#else - return true; -#endif -} - static void vi_program_aspm(struct amdgpu_device *adev) { u32 data, data1, orig; bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check()) + if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk()) return; if (adev->flags & AMD_IS_APU || -- cgit From 4c94e57c258cb7800aa5f3a9d9597d91291407a9 Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Thu, 9 Mar 2023 16:14:08 -0500 Subject: drm/amd/display: fix wrong index used in dccg32_set_dpstreamclk [Why & How] When merging commit 9af611f29034 ("drm/amd/display: Fix DCN32 DPSTREAMCLK_CNTL programming"), index change was not picked up. Cc: stable@vger.kernel.org Cc: Mario Limonciello Fixes: 9af611f29034 ("drm/amd/display: Fix DCN32 DPSTREAMCLK_CNTL programming") Reviewed-by: Qingqing Zhuo Acked-by: Qingqing Zhuo Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index e4472c6be6c3..3fb4bcc34353 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -271,8 +271,7 @@ static void dccg32_set_dpstreamclk( dccg32_set_dtbclk_p_src(dccg, src, otg_inst); /* enabled to select one of the DTBCLKs for pipe */ - switch (otg_inst) - { + switch (dp_hpo_inst) { case 0: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, -- cgit From f9537b1fa7fb51c2162bc15ce469cbbf1ca0fbfe Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Thu, 9 Mar 2023 15:58:54 -0500 Subject: drm/amd/display: Set dcn32 caps.seamless_odm [Why & How] seamless_odm set was not picked up while merging commit 2d017189e2b3 ("drm/amd/display: Blank eDP on enable drv if odm enabled") Fixes: 2d017189e2b3 ("drm/amd/display: Blank eDP on enable drv if odm enabled") Reviewed-by: Qingqing Zhuo Acked-by: Qingqing Zhuo Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index d024007f0f65..4b7abb4af623 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2186,6 +2186,7 @@ static bool dcn32_resource_construct( dc->caps.edp_dsc_support = true; dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; + dc->caps.seamless_odm = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; -- cgit From 0b1d9debe30304f35c1211e6dcdca1935ce67240 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 8 Mar 2023 00:21:34 +0100 Subject: efi/libstub: randomalloc: Return EFI_OUT_OF_RESOURCES on failure The logic in efi_random_alloc() will iterate over the memory map twice, once to count the number of candidate slots, and another time to locate the chosen slot after randomization. If there is insufficient memory to do the allocation, the second loop will run to completion without actually having located a slot, but we currently return EFI_SUCCESS in this case, as we fail to initialize status to the appropriate error value of EFI_OUT_OF_RESOURCES. Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/randomalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 1692d19ae80f..32c7a54923b4 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -101,6 +101,7 @@ efi_status_t efi_random_alloc(unsigned long size, * to calculate the randomly chosen address, and allocate it directly * using EFI_ALLOCATE_ADDRESS. */ + status = EFI_OUT_OF_RESOURCES; for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) { efi_memory_desc_t *md = (void *)map->map + map_offset; efi_physical_addr_t target; -- cgit From 072a28c8907c841f7d4b56c78bce46d3ee211e73 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 8 Mar 2023 12:11:31 +0000 Subject: cifs: do not poll server interfaces too regularly We have the server interface list hanging off the tcon structure today for reasons unknown. So each tcon which is connected to a file server can query them separately, which is really unnecessary. To avoid this, in the query function, we will check the time of last update of the interface list, and avoid querying the server if it is within a certain range. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 6dfb865ee9d7..96ca09d885a2 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -530,6 +530,14 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, p = buf; spin_lock(&ses->iface_lock); + /* do not query too frequently, this time with lock held */ + if (ses->iface_last_update && + time_before(jiffies, ses->iface_last_update + + (SMB_INTERFACE_POLL_INTERVAL * HZ))) { + spin_unlock(&ses->iface_lock); + return 0; + } + /* * Go through iface_list and do kref_put to remove * any unused ifaces. ifaces in use will be removed @@ -696,6 +704,12 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ struct network_interface_info_ioctl_rsp *out_buf = NULL; struct cifs_ses *ses = tcon->ses; + /* do not query too frequently */ + if (ses->iface_last_update && + time_before(jiffies, ses->iface_last_update + + (SMB_INTERFACE_POLL_INTERVAL * HZ))) + return 0; + rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_QUERY_NETWORK_INTERFACE_INFO, NULL /* no data input */, 0 /* no data input */, -- cgit From 896cd316b841053f6df95ab77b5f1322c16a8e18 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 9 Mar 2023 13:23:29 +0000 Subject: cifs: empty interface list when server doesn't support query interfaces When querying server interfaces returns -EOPNOTSUPP, clear the list of interfaces. Assumption is that multichannel would be disabled too. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 96ca09d885a2..f7e18ab7ee9c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -717,7 +717,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon, bool in_ if (rc == -EOPNOTSUPP) { cifs_dbg(FYI, "server does not support query network interfaces\n"); - goto out; + ret_data_len = 0; } else if (rc != 0) { cifs_tcon_dbg(VFS, "error %d on ioctl to get interface list\n", rc); goto out; -- cgit From d12bc6d26f92c51b28e8f4a146ffcc630b688198 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 13 Mar 2023 11:09:12 +0000 Subject: cifs: dump pending mids for all channels in DebugData Currently, we only dump the pending mid information only on the primary channel in /proc/fs/cifs/DebugData. If multichannel is active, we do not print the pending MID list on secondary channels. This change will dump the pending mids for all the channels based on server->conn_id. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 19a70a69c760..38369c6cd062 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -216,6 +216,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { struct mid_q_entry *mid_entry; struct TCP_Server_Info *server; + struct TCP_Server_Info *chan_server; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifs_server_iface *iface; @@ -474,23 +475,35 @@ skip_rdma: seq_puts(m, "\t\t[CONNECTED]\n"); } spin_unlock(&ses->iface_lock); + + seq_puts(m, "\n\n\tMIDs: "); + spin_lock(&ses->chan_lock); + for (j = 0; j < ses->chan_count; j++) { + chan_server = ses->chans[j].server; + if (!chan_server) + continue; + + if (list_empty(&chan_server->pending_mid_q)) + continue; + + seq_printf(m, "\n\tServer ConnectionId: 0x%llx", + chan_server->conn_id); + spin_lock(&chan_server->mid_lock); + list_for_each_entry(mid_entry, &chan_server->pending_mid_q, qhead) { + seq_printf(m, "\n\t\tState: %d com: %d pid: %d cbdata: %p mid %llu", + mid_entry->mid_state, + le16_to_cpu(mid_entry->command), + mid_entry->pid, + mid_entry->callback_data, + mid_entry->mid); + } + spin_unlock(&chan_server->mid_lock); + } + spin_unlock(&ses->chan_lock); + seq_puts(m, "\n--\n"); } if (i == 0) seq_printf(m, "\n\t\t[NONE]"); - - seq_puts(m, "\n\n\tMIDs: "); - spin_lock(&server->mid_lock); - list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { - seq_printf(m, "\n\tState: %d com: %d pid:" - " %d cbdata: %p mid %llu\n", - mid_entry->mid_state, - le16_to_cpu(mid_entry->command), - mid_entry->pid, - mid_entry->callback_data, - mid_entry->mid); - } - spin_unlock(&server->mid_lock); - seq_printf(m, "\n--\n"); } if (c == 0) seq_printf(m, "\n\t[NONE]"); -- cgit From 175b54abc443b6965e9379b71ec05f7c73c192e9 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 13 Mar 2023 12:17:34 +0000 Subject: cifs: print session id while listing open files In the output of /proc/fs/cifs/open_files, we only print the tree id for the tcon of each open file. It becomes difficult to know which tcon these files belong to with just the tree id. This change dumps ses id in addition to all other data today. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 38369c6cd062..e9c8c088d948 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -176,7 +176,7 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) seq_puts(m, "# Version:1\n"); seq_puts(m, "# Format:\n"); - seq_puts(m, "# "); + seq_puts(m, "# "); #ifdef CONFIG_CIFS_DEBUG2 seq_printf(m, " \n"); #else @@ -189,8 +189,9 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) spin_lock(&tcon->open_file_lock); list_for_each_entry(cfile, &tcon->openFileList, tlist) { seq_printf(m, - "0x%x 0x%llx 0x%x %d %d %d %pd", + "0x%x 0x%llx 0x%llx 0x%x %d %d %d %pd", tcon->tid, + ses->Suid, cfile->fid.persistent_fid, cfile->f_flags, cfile->count, -- cgit From 3670de80678961eda7fa2220883fc77c16868951 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 17 Mar 2023 14:15:15 +0800 Subject: usb: chipdea: core: fix return -EINVAL if request role is the same with current role It should not return -EINVAL if the request role is the same with current role, return non-error and without do anything instead. Fixes: a932a8041ff9 ("usb: chipidea: core: add sysfs group") cc: Acked-by: Peter Chen Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20230317061516.2451728-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 27c601296130..b6f2a41de20e 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -984,9 +984,12 @@ static ssize_t role_store(struct device *dev, strlen(ci->roles[role]->name))) break; - if (role == CI_ROLE_END || role == ci->role) + if (role == CI_ROLE_END) return -EINVAL; + if (role == ci->role) + return n; + pm_runtime_get_sync(dev); disable_irq(ci->irq); ci_role_stop(ci); -- cgit From 451b15ed138ec15bffbebb58a00ebdd884c3e659 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 17 Mar 2023 14:15:16 +0800 Subject: usb: chipidea: core: fix possible concurrent when switch role The user may call role_store() when driver is handling ci_handle_id_switch() which is triggerred by otg event or power lost event. Unfortunately, the controller may go into chaos in this case. Fix this by protecting it with mutex lock. Fixes: a932a8041ff9 ("usb: chipidea: core: add sysfs group") cc: Acked-by: Peter Chen Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20230317061516.2451728-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci.h | 2 ++ drivers/usb/chipidea/core.c | 8 +++++++- drivers/usb/chipidea/otg.c | 5 ++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h index 005c67cb3afb..f210b7489fd5 100644 --- a/drivers/usb/chipidea/ci.h +++ b/drivers/usb/chipidea/ci.h @@ -208,6 +208,7 @@ struct hw_bank { * @in_lpm: if the core in low power mode * @wakeup_int: if wakeup interrupt occur * @rev: The revision number for controller + * @mutex: protect code from concorrent running when doing role switch */ struct ci_hdrc { struct device *dev; @@ -260,6 +261,7 @@ struct ci_hdrc { bool in_lpm; bool wakeup_int; enum ci_revision rev; + struct mutex mutex; }; static inline struct ci_role_driver *ci_role(struct ci_hdrc *ci) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index b6f2a41de20e..281fc51720ce 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -987,8 +987,12 @@ static ssize_t role_store(struct device *dev, if (role == CI_ROLE_END) return -EINVAL; - if (role == ci->role) + mutex_lock(&ci->mutex); + + if (role == ci->role) { + mutex_unlock(&ci->mutex); return n; + } pm_runtime_get_sync(dev); disable_irq(ci->irq); @@ -998,6 +1002,7 @@ static ssize_t role_store(struct device *dev, ci_handle_vbus_change(ci); enable_irq(ci->irq); pm_runtime_put_sync(dev); + mutex_unlock(&ci->mutex); return (ret == 0) ? n : ret; } @@ -1033,6 +1038,7 @@ static int ci_hdrc_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&ci->lock); + mutex_init(&ci->mutex); ci->dev = dev; ci->platdata = dev_get_platdata(dev); ci->imx28_write_fix = !!(ci->platdata->flags & diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index 622c3b68aa1e..f5490f2a5b6b 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -167,8 +167,10 @@ static int hw_wait_vbus_lower_bsv(struct ci_hdrc *ci) void ci_handle_id_switch(struct ci_hdrc *ci) { - enum ci_role role = ci_otg_role(ci); + enum ci_role role; + mutex_lock(&ci->mutex); + role = ci_otg_role(ci); if (role != ci->role) { dev_dbg(ci->dev, "switching from %s to %s\n", ci_role(ci)->name, ci->roles[role]->name); @@ -198,6 +200,7 @@ void ci_handle_id_switch(struct ci_hdrc *ci) if (role == CI_ROLE_GADGET) ci_handle_vbus_change(ci); } + mutex_unlock(&ci->mutex); } /** * ci_otg_work - perform otg (vbus/id) event handle -- cgit From f747313249b74f323ddf841a9c8db14d989f296a Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 16 Mar 2023 09:41:27 +0100 Subject: usb: dwc2: fix a devres leak in hw_enable upon suspend resume Each time the platform goes to low power, PM suspend / resume routines call: __dwc2_lowlevel_hw_enable -> devm_add_action_or_reset(). This adds a new devres each time. This may also happen at runtime, as dwc2_lowlevel_hw_enable() can be called from udc_start(). This can be seen with tracing: - echo 1 > /sys/kernel/debug/tracing/events/dev/devres_log/enable - go to low power - cat /sys/kernel/debug/tracing/trace A new "ADD" entry is found upon each low power cycle: ... devres_log: 49000000.usb-otg ADD 82a13bba devm_action_release (8 bytes) ... devres_log: 49000000.usb-otg ADD 49889daf devm_action_release (8 bytes) ... A second issue is addressed here: - regulator_bulk_enable() is called upon each PM cycle (suspend/resume). - regulator_bulk_disable() never gets called. So the reference count for these regulators constantly increase, by one upon each low power cycle, due to missing regulator_bulk_disable() call in __dwc2_lowlevel_hw_disable(). The original fix that introduced the devm_add_action_or_reset() call, fixed an issue during probe, that happens due to other errors in dwc2_driver_probe() -> dwc2_core_reset(). Then the probe fails without disabling regulators, when dr_mode == USB_DR_MODE_PERIPHERAL. Rather fix the error path: disable all the low level hardware in the error path, by using the "hsotg->ll_hw_enabled" flag. Checking dr_mode has been introduced to avoid a dual call to dwc2_lowlevel_hw_disable(). "ll_hw_enabled" should achieve the same (and is used currently in the remove() routine). Fixes: 54c196060510 ("usb: dwc2: Always disable regulators on driver teardown") Fixes: 33a06f1300a7 ("usb: dwc2: Fix error path in gadget registration") Cc: stable Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20230316084127.126084-1-fabrice.gasnier@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/platform.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 23ef75996823..e93506739664 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -91,13 +91,6 @@ static int dwc2_get_dr_mode(struct dwc2_hsotg *hsotg) return 0; } -static void __dwc2_disable_regulators(void *data) -{ - struct dwc2_hsotg *hsotg = data; - - regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies), hsotg->supplies); -} - static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg) { struct platform_device *pdev = to_platform_device(hsotg->dev); @@ -108,11 +101,6 @@ static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg) if (ret) return ret; - ret = devm_add_action_or_reset(&pdev->dev, - __dwc2_disable_regulators, hsotg); - if (ret) - return ret; - if (hsotg->clk) { ret = clk_prepare_enable(hsotg->clk); if (ret) @@ -168,7 +156,7 @@ static int __dwc2_lowlevel_hw_disable(struct dwc2_hsotg *hsotg) if (hsotg->clk) clk_disable_unprepare(hsotg->clk); - return 0; + return regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies), hsotg->supplies); } /** @@ -608,7 +596,7 @@ error_init: if (hsotg->params.activate_stm_id_vb_detection) regulator_disable(hsotg->usb33d); error: - if (hsotg->dr_mode != USB_DR_MODE_PERIPHERAL) + if (hsotg->ll_hw_enabled) dwc2_lowlevel_hw_disable(hsotg); return retval; } -- cgit From 5021383242ada277a38bd052a4c12ed4707faccb Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 15 Mar 2023 15:44:33 +0100 Subject: usb: dwc2: fix a race, don't power off/on phy for dual-role mode When in dual role mode (dr_mode == USB_DR_MODE_OTG), platform probe successively basically calls: - dwc2_gadget_init() - dwc2_hcd_init() - dwc2_lowlevel_hw_disable() since recent change [1] - usb_add_gadget_udc() The PHYs (and so the clocks it may provide) shouldn't be disabled for all SoCs, in OTG mode, as the HCD part has been initialized. On STM32 this creates some weird race condition upon boot, when: - initially attached as a device, to a HOST - and there is a gadget script invoked to setup the device part. Below issue becomes systematic, as long as the gadget script isn't started by userland: the hardware PHYs (and so the clocks provided by the PHYs) remains disabled. It ends up in having an endless interrupt storm, before the watchdog resets the platform. [ 16.924163] dwc2 49000000.usb-otg: EPs: 9, dedicated fifos, 952 entries in SPRAM [ 16.962704] dwc2 49000000.usb-otg: DWC OTG Controller [ 16.966488] dwc2 49000000.usb-otg: new USB bus registered, assigned bus number 2 [ 16.974051] dwc2 49000000.usb-otg: irq 77, io mem 0x49000000 [ 17.032170] hub 2-0:1.0: USB hub found [ 17.042299] hub 2-0:1.0: 1 port detected [ 17.175408] dwc2 49000000.usb-otg: Mode Mismatch Interrupt: currently in Host mode [ 17.181741] dwc2 49000000.usb-otg: Mode Mismatch Interrupt: currently in Host mode [ 17.189303] dwc2 49000000.usb-otg: Mode Mismatch Interrupt: currently in Host mode ... The host part is also not functional, until the gadget part is configured. The HW may only be disabled for peripheral mode (original init), e.g. dr_mode == USB_DR_MODE_PERIPHERAL, until the gadget driver initializes. But when in USB_DR_MODE_OTG, the HW should remain enabled, as the HCD part is able to run, while the gadget part isn't necessarily configured. I don't fully get the of purpose the original change, that claims disabling the hardware is missing. It creates conditions on SOCs using the PHY initialization to be completely non working in OTG mode. Original change [1] should be reworked to be platform specific. [1] https://lore.kernel.org/r/20221206-dwc2-gadget-dual-role-v1-2-36515e1092cd@theobroma-systems.com Fixes: ade23d7b7ec5 ("usb: dwc2: power on/off phy for peripheral mode in dual-role mode") Cc: stable Signed-off-by: Fabrice Gasnier Reviewed-by: Quentin Schulz Tested-by: Quentin Schulz Link: https://lore.kernel.org/r/20230315144433.3095859-1-fabrice.gasnier@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/gadget.c | 6 ++---- drivers/usb/dwc2/platform.c | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 62fa6378d2d7..8b15742d9e8a 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -4549,8 +4549,7 @@ static int dwc2_hsotg_udc_start(struct usb_gadget *gadget, hsotg->gadget.dev.of_node = hsotg->dev->of_node; hsotg->gadget.speed = USB_SPEED_UNKNOWN; - if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL || - (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) { + if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) { ret = dwc2_lowlevel_hw_enable(hsotg); if (ret) goto err; @@ -4612,8 +4611,7 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget) if (!IS_ERR_OR_NULL(hsotg->uphy)) otg_set_peripheral(hsotg->uphy->otg, NULL); - if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL || - (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) + if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) dwc2_lowlevel_hw_disable(hsotg); return 0; diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index e93506739664..d1589ba7d322 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -564,8 +564,7 @@ static int dwc2_driver_probe(struct platform_device *dev) dwc2_debugfs_init(hsotg); /* Gadget code manages lowlevel hw on its own */ - if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL || - (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) + if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) dwc2_lowlevel_hw_disable(hsotg); #if IS_ENABLED(CONFIG_USB_DWC2_PERIPHERAL) || \ -- cgit From 260595b439776c473cc248f0de63fe78d964d849 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 23 Mar 2023 12:26:02 -0400 Subject: Reinstate "GFS2: free disk inode which is deleted by remote node -V2" It turns out that reverting commit 970343cd4904 ("GFS2: free disk inode which is deleted by remote node -V2") causes a regression related to evicting inodes that were unlinked on a different cluster node. We could also have simply added a call to d_mark_dontcache() to function gfs2_try_evict(), but the original pre-revert code is better tested and proven. This reverts commit 445cb1277e10d7e19b631ef8a64aa3f055df377d. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/dentry.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 6fe9ca253b70..2e215e8c3c88 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -83,8 +83,26 @@ static int gfs2_dhash(const struct dentry *dentry, struct qstr *str) return 0; } +static int gfs2_dentry_delete(const struct dentry *dentry) +{ + struct gfs2_inode *ginode; + + if (d_really_is_negative(dentry)) + return 0; + + ginode = GFS2_I(d_inode(dentry)); + if (!gfs2_holder_initialized(&ginode->i_iopen_gh)) + return 0; + + if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags)) + return 1; + + return 0; +} + const struct dentry_operations gfs2_dops = { .d_revalidate = gfs2_drevalidate, .d_hash = gfs2_dhash, + .d_delete = gfs2_dentry_delete, }; -- cgit From e89c2e815e76471cb507bd95728bf26da7976430 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 13 Mar 2023 16:00:23 -0700 Subject: riscv: Handle zicsr/zifencei issues between clang and binutils There are two related issues that appear in certain combinations with clang and GNU binutils. The first occurs when a version of clang that supports zicsr or zifencei via '-march=' [1] (i.e, >= 17.x) is used in combination with a version of GNU binutils that do not recognize zicsr and zifencei in the '-march=' value (i.e., < 2.36): riscv64-linux-gnu-ld: -march=rv64i2p0_m2p0_a2p0_c2p0_zicsr2p0_zifencei2p0: Invalid or unknown z ISA extension: 'zifencei' riscv64-linux-gnu-ld: failed to merge target specific data of file fs/efivarfs/file.o riscv64-linux-gnu-ld: -march=rv64i2p0_m2p0_a2p0_c2p0_zicsr2p0_zifencei2p0: Invalid or unknown z ISA extension: 'zifencei' riscv64-linux-gnu-ld: failed to merge target specific data of file fs/efivarfs/super.o The second occurs when a version of clang that does not support zicsr or zifencei via '-march=' (i.e., <= 16.x) is used in combination with a version of GNU as that defaults to a newer ISA base spec, which requires specifying zicsr and zifencei in the '-march=' value explicitly (i.e, >= 2.38): ../arch/riscv/kernel/kexec_relocate.S: Assembler messages: ../arch/riscv/kernel/kexec_relocate.S:147: Error: unrecognized opcode `fence.i', extension `zifencei' required clang-12: error: assembler command failed with exit code 1 (use -v to see invocation) This is the same issue addressed by commit 6df2a016c0c8 ("riscv: fix build with binutils 2.38") (see [2] for additional information) but older versions of clang miss out on it because the cc-option check fails: clang-12: error: invalid arch name 'rv64imac_zicsr_zifencei', unsupported standard user-level extension 'zicsr' clang-12: error: invalid arch name 'rv64imac_zicsr_zifencei', unsupported standard user-level extension 'zicsr' To resolve the first issue, only attempt to add zicsr and zifencei to the march string when using the GNU assembler 2.38 or newer, which is when the default ISA spec was updated, requiring these extensions to be specified explicitly. LLVM implements an older version of the base specification for all currently released versions, so these instructions are available as part of the 'i' extension. If LLVM's implementation is updated in the future, a CONFIG_AS_IS_LLVM condition can be added to CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. To resolve the second issue, use version 2.2 of the base ISA spec when using an older version of clang that does not support zicsr or zifencei via '-march=', as that is the spec version most compatible with the one clang/LLVM implements and avoids the need to specify zicsr and zifencei explicitly due to still being a part of 'i'. [1]: https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 [2]: https://lore.kernel.org/ZAxT7T9Xy1Fo3d5W@aurel32.net/ Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1808 Co-developed-by: Conor Dooley Signed-off-by: Conor Dooley Signed-off-by: Nathan Chancellor Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230313-riscv-zicsr-zifencei-fiasco-v1-1-dd1b7840a551@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 22 ++++++++++++++++++++++ arch/riscv/Makefile | 10 ++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c5e42cc37604..5b182d1c196c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -464,6 +464,28 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zihintpause) depends on LLD_VERSION >= 150000 || LD_VERSION >= 23600 +config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + def_bool y + # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc + depends on AS_IS_GNU && AS_VERSION >= 23800 + help + Newer binutils versions default to ISA spec version 20191213 which + moves some instructions from the I extension to the Zicsr and Zifencei + extensions. + +config TOOLCHAIN_NEEDS_OLD_ISA_SPEC + def_bool y + depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 + depends on CC_IS_CLANG && CLANG_VERSION < 170000 + help + Certain versions of clang do not support zicsr and zifencei via -march + but newer versions of binutils require it for the reasons noted in the + help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This + option causes an older ISA spec compatible with these older versions + of clang to be passed to GAS, which has the same result as passing zicsr + and zifencei to -march. + config FPU bool "FPU support" default y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 4de83b9b1772..b05e833a022d 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -57,10 +57,12 @@ riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c -# Newer binutils versions default to ISA spec version 20191213 which moves some -# instructions from the I extension to the Zicsr and Zifencei extensions. -toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) -riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei +ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC +KBUILD_CFLAGS += -Wa,-misa-spec=2.2 +KBUILD_AFLAGS += -Wa,-misa-spec=2.2 +else +riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei +endif # Check if the toolchain supports Zihintpause extension riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause -- cgit From 902160cdb2bf4d23cd75f43ed0597ddf0134bb89 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Wed, 8 Mar 2023 13:28:37 +0530 Subject: Bluetooth: btinel: Check ACPI handle for NULL before accessing Older platforms and Virtual platforms which doesn't have support for bluetooth device in ACPI firmware will not have valid ACPI handle. Check for validity of handle before accessing. dmesg log from simics environment (virtual platform): BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: acpi_ns_walk_namespace+0x5c/0x278 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI Modules linked in: bnep intel_powerclamp coretemp kvm_intel kvm irqbypass intel_cstate input_leds joydev serio_raw mac_hid btusb(OE) btintel(OE) bluetooth(OE) lpc_ich compat(OE) ecdh_generic i7core_edac i5500_temp shpchp binfmt_misc sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid e1000e psmouse ahci pata_acpi libahci ptp pps_core floppy CPU: 0 PID: 35 Comm: kworker/u3:0 Tainted: G OE 4.15.0-140-generic #144-Ubuntu Hardware name: Simics Simics, BIOS Simics 01/01/2011 Workqueue: hci0 hci_power_on [bluetooth] RIP: 0010:acpi_ns_walk_namespace+0x5c/0x278 RSP: 0000:ffffaa9c0049bba8 EFLAGS: 00010246 RAX: 0000000000000001 RBX: 0000000000001001 RCX: 0000000000000010 RDX: ffffffff92ea7e27 RSI: ffffffff92ea7e10 RDI: 00000000000000c8 RBP: ffffaa9c0049bbf8 R08: 0000000000000000 R09: ffffffffc05b39d0 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000001 R13: 0000000000000000 R14: ffffffffc05b39d0 R15: ffffaa9c0049bc70 FS: 0000000000000000(0000) GS:ffff8be73fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 0000000075f0e000 CR4: 00000000000006f0 Fixes: 294d749b5df5 ("Bluetooth: btintel: Iterate only bluetooth device ACPI entries") Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index e8d4b59e89c5..af774688f1c0 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2326,6 +2326,7 @@ static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver struct btintel_ppag ppag; struct sk_buff *skb; struct btintel_loc_aware_reg ppag_cmd; + acpi_handle handle; /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */ switch (ver->cnvr_top & 0xFFF) { @@ -2335,12 +2336,18 @@ static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver return; } + handle = ACPI_HANDLE(GET_HCIDEV_DEV(hdev)); + if (!handle) { + bt_dev_info(hdev, "No support for BT device in ACPI firmware"); + return; + } + memset(&ppag, 0, sizeof(ppag)); ppag.hdev = hdev; ppag.status = AE_NOT_FOUND; - acpi_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_HANDLE(GET_HCIDEV_DEV(hdev)), - 1, NULL, btintel_ppag_callback, &ppag, NULL); + acpi_walk_namespace(ACPI_TYPE_PACKAGE, handle, 1, NULL, + btintel_ppag_callback, &ppag, NULL); if (ACPI_FAILURE(ppag.status)) { if (ppag.status == AE_NOT_FOUND) { -- cgit From 5d44ab9e204200a78ad55cdf185aa2bb109b5950 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 8 Mar 2023 14:31:55 +0100 Subject: Bluetooth: btqcomsmd: Fix command timeout after setting BD address On most devices using the btqcomsmd driver (e.g. the DragonBoard 410c and other devices based on the Qualcomm MSM8916/MSM8909/... SoCs) the Bluetooth firmware seems to become unresponsive for a while after setting the BD address. On recent kernel versions (at least 5.17+) this often causes timeouts for subsequent commands, e.g. the HCI reset sent by the Bluetooth core during initialization: Bluetooth: hci0: Opcode 0x c03 failed: -110 Unfortunately this behavior does not seem to be documented anywhere. Experimentation suggests that the minimum necessary delay to avoid the problem is ~150us. However, to be sure add a sleep for > 1ms in case it is a bit longer on other firmware versions. Older kernel versions are likely also affected, although perhaps with slightly different errors or less probability. Side effects can easily hide the issue in most cases, e.g. unrelated incoming interrupts that cause the necessary delay. Fixes: 1511cc750c3d ("Bluetooth: Introduce Qualcomm WCNSS SMD based HCI driver") Signed-off-by: Stephan Gerhold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqcomsmd.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c index 2acb719e596f..11c7e04bf394 100644 --- a/drivers/bluetooth/btqcomsmd.c +++ b/drivers/bluetooth/btqcomsmd.c @@ -122,6 +122,21 @@ static int btqcomsmd_setup(struct hci_dev *hdev) return 0; } +static int btqcomsmd_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr) +{ + int ret; + + ret = qca_set_bdaddr_rome(hdev, bdaddr); + if (ret) + return ret; + + /* The firmware stops responding for a while after setting the bdaddr, + * causing timeouts for subsequent commands. Sleep a bit to avoid this. + */ + usleep_range(1000, 10000); + return 0; +} + static int btqcomsmd_probe(struct platform_device *pdev) { struct btqcomsmd *btq; @@ -162,7 +177,7 @@ static int btqcomsmd_probe(struct platform_device *pdev) hdev->close = btqcomsmd_close; hdev->send = btqcomsmd_send; hdev->setup = btqcomsmd_setup; - hdev->set_bdaddr = qca_set_bdaddr_rome; + hdev->set_bdaddr = btqcomsmd_set_bdaddr; ret = hci_register_dev(hdev); if (ret < 0) -- cgit From 9aa9d9473f1550d1936c31259720b3f1f4690576 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 8 Mar 2023 14:20:34 -0800 Subject: Bluetooth: L2CAP: Fix responding with wrong PDU type L2CAP_ECRED_CONN_REQ shall be responded with L2CAP_ECRED_CONN_RSP not L2CAP_LE_CONN_RSP: L2CAP LE EATT Server - Reject - run Listening for connections New client connection with handle 0x002a Sending L2CAP Request from client Client received response code 0x15 Unexpected L2CAP response code (expected 0x18) L2CAP LE EATT Server - Reject - test failed > ACL Data RX: Handle 42 flags 0x02 dlen 26 LE L2CAP: Enhanced Credit Connection Request (0x17) ident 1 len 18 PSM: 39 (0x0027) MTU: 64 MPS: 64 Credits: 5 Source CID: 65 Source CID: 66 Source CID: 67 Source CID: 68 Source CID: 69 < ACL Data TX: Handle 42 flags 0x00 dlen 16 LE L2CAP: LE Connection Response (0x15) ident 1 len 8 invalid size 00 00 00 00 00 00 06 00 L2CAP LE EATT Server - Reject - run Listening for connections New client connection with handle 0x002a Sending L2CAP Request from client Client received response code 0x18 L2CAP LE EATT Server - Reject - test passed Fixes: 15f02b910562 ("Bluetooth: L2CAP: Add initial code for Enhanced Credit Based Mode") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 117 ++++++++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 38 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index adfc3ea06d08..49926f59cc12 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -708,6 +708,17 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) } EXPORT_SYMBOL_GPL(l2cap_chan_del); +static void __l2cap_chan_list_id(struct l2cap_conn *conn, u16 id, + l2cap_chan_func_t func, void *data) +{ + struct l2cap_chan *chan, *l; + + list_for_each_entry_safe(chan, l, &conn->chan_l, list) { + if (chan->ident == id) + func(chan, data); + } +} + static void __l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, void *data) { @@ -775,23 +786,9 @@ static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) static void l2cap_chan_ecred_connect_reject(struct l2cap_chan *chan) { - struct l2cap_conn *conn = chan->conn; - struct l2cap_ecred_conn_rsp rsp; - u16 result; - - if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) - result = L2CAP_CR_LE_AUTHORIZATION; - else - result = L2CAP_CR_LE_BAD_PSM; - l2cap_state_change(chan, BT_DISCONN); - memset(&rsp, 0, sizeof(rsp)); - - rsp.result = cpu_to_le16(result); - - l2cap_send_cmd(conn, chan->ident, L2CAP_LE_CONN_RSP, sizeof(rsp), - &rsp); + __l2cap_ecred_conn_rsp_defer(chan); } static void l2cap_chan_connect_reject(struct l2cap_chan *chan) @@ -846,7 +843,7 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) break; case L2CAP_MODE_EXT_FLOWCTL: l2cap_chan_ecred_connect_reject(chan); - break; + return; } } } @@ -3938,43 +3935,86 @@ void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan) &rsp); } -void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) +static void l2cap_ecred_list_defer(struct l2cap_chan *chan, void *data) { + int *result = data; + + if (*result || test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + return; + + switch (chan->state) { + case BT_CONNECT2: + /* If channel still pending accept add to result */ + (*result)++; + return; + case BT_CONNECTED: + return; + default: + /* If not connected or pending accept it has been refused */ + *result = -ECONNREFUSED; + return; + } +} + +struct l2cap_ecred_rsp_data { struct { struct l2cap_ecred_conn_rsp rsp; - __le16 dcid[5]; + __le16 scid[L2CAP_ECRED_MAX_CID]; } __packed pdu; + int count; +}; + +static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) +{ + struct l2cap_ecred_rsp_data *rsp = data; + + if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + return; + + /* Reset ident so only one response is sent */ + chan->ident = 0; + + /* Include all channels pending with the same ident */ + if (!rsp->pdu.rsp.result) + rsp->pdu.rsp.dcid[rsp->count++] = cpu_to_le16(chan->scid); + else + l2cap_chan_del(chan, ECONNRESET); +} + +void __l2cap_ecred_conn_rsp_defer(struct l2cap_chan *chan) +{ struct l2cap_conn *conn = chan->conn; - u16 ident = chan->ident; - int i = 0; + struct l2cap_ecred_rsp_data data; + u16 id = chan->ident; + int result = 0; - if (!ident) + if (!id) return; - BT_DBG("chan %p ident %d", chan, ident); + BT_DBG("chan %p id %d", chan, id); - pdu.rsp.mtu = cpu_to_le16(chan->imtu); - pdu.rsp.mps = cpu_to_le16(chan->mps); - pdu.rsp.credits = cpu_to_le16(chan->rx_credits); - pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); + memset(&data, 0, sizeof(data)); - mutex_lock(&conn->chan_lock); + data.pdu.rsp.mtu = cpu_to_le16(chan->imtu); + data.pdu.rsp.mps = cpu_to_le16(chan->mps); + data.pdu.rsp.credits = cpu_to_le16(chan->rx_credits); + data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_SUCCESS); - list_for_each_entry(chan, &conn->chan_l, list) { - if (chan->ident != ident) - continue; + /* Verify that all channels are ready */ + __l2cap_chan_list_id(conn, id, l2cap_ecred_list_defer, &result); - /* Reset ident so only one response is sent */ - chan->ident = 0; + if (result > 0) + return; - /* Include all channels pending with the same ident */ - pdu.dcid[i++] = cpu_to_le16(chan->scid); - } + if (result < 0) + data.pdu.rsp.result = cpu_to_le16(L2CAP_CR_LE_AUTHORIZATION); - mutex_unlock(&conn->chan_lock); + /* Build response */ + __l2cap_chan_list_id(conn, id, l2cap_ecred_rsp_defer, &data); - l2cap_send_cmd(conn, ident, L2CAP_ECRED_CONN_RSP, - sizeof(pdu.rsp) + i * sizeof(__le16), &pdu); + l2cap_send_cmd(conn, id, L2CAP_ECRED_CONN_RSP, + sizeof(data.pdu.rsp) + (data.count * sizeof(__le16)), + &data.pdu); } void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) @@ -6078,6 +6118,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, __set_chan_timer(chan, chan->ops->get_sndtimeo(chan)); chan->ident = cmd->ident; + chan->mode = L2CAP_MODE_EXT_FLOWCTL; if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { l2cap_state_change(chan, BT_CONNECT2); -- cgit From 1e9ac114c4428fdb7ff4635b45d4f46017e8916f Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 9 Mar 2023 16:07:39 +0800 Subject: Bluetooth: btsdio: fix use after free bug in btsdio_remove due to unfinished work In btsdio_probe, &data->work was bound with btsdio_work.In btsdio_send_frame, it was started by schedule_work. If we call btsdio_remove with an unfinished job, there may be a race condition and cause UAF bug on hdev. Fixes: ddbaf13e3609 ("[Bluetooth] Add generic driver for Bluetooth SDIO devices") Signed-off-by: Zheng Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btsdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 795be33f2892..02893600db39 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -354,6 +354,7 @@ static void btsdio_remove(struct sdio_func *func) BT_DBG("func %p", func); + cancel_work_sync(&data->work); if (!data) return; -- cgit From 1a0291f81529e8044fb29845a0196ba47af894ce Mon Sep 17 00:00:00 2001 From: Howard Chung Date: Thu, 16 Mar 2023 18:11:38 +0800 Subject: Bluetooth: mgmt: Fix MGMT add advmon with RSSI command The MGMT command: MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI uses variable length argument. This causes host not able to register advmon with rssi. This patch has been locally tested by adding monitor with rssi via btmgmt on a kernel 6.1 machine. Reviewed-by: Archie Pusaka Fixes: b338d91703fa ("Bluetooth: Implement support for Mesh") Signed-off-by: Howard Chung Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 39589f864ea7..249dc6777fb4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9357,7 +9357,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE, HCI_MGMT_VAR_LEN }, { add_adv_patterns_monitor_rssi, - MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE }, + MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE, + HCI_MGMT_VAR_LEN }, { set_mesh, MGMT_SET_MESH_RECEIVER_SIZE, HCI_MGMT_VAR_LEN }, { mesh_features, MGMT_MESH_READ_FEATURES_SIZE }, -- cgit From bce56405201111807cc8e4f47c6de3e10b17c1ac Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Mon, 20 Mar 2023 21:50:18 -0400 Subject: Bluetooth: HCI: Fix global-out-of-bounds To loop a variable-length array, hci_init_stage_sync(stage) considers that stage[i] is valid as long as stage[i-1].func is valid. Thus, the last element of stage[].func should be intentionally invalid as hci_init0[], le_init2[], and others did. However, amp_init1[] and amp_init2[] have no invalid element, letting hci_init_stage_sync() keep accessing amp_init1[] over its valid range. This patch fixes this by adding {} in the last of amp_init1[] and amp_init2[]. ================================================================== BUG: KASAN: global-out-of-bounds in hci_dev_open_sync ( /v6.2-bzimage/net/bluetooth/hci_sync.c:3154 /v6.2-bzimage/net/bluetooth/hci_sync.c:3343 /v6.2-bzimage/net/bluetooth/hci_sync.c:4418 /v6.2-bzimage/net/bluetooth/hci_sync.c:4609 /v6.2-bzimage/net/bluetooth/hci_sync.c:4689) Read of size 8 at addr ffffffffaed1ab70 by task kworker/u5:0/1032 CPU: 0 PID: 1032 Comm: kworker/u5:0 Not tainted 6.2.0 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04 Workqueue: hci1 hci_power_on Call Trace: dump_stack_lvl (/v6.2-bzimage/lib/dump_stack.c:107 (discriminator 1)) print_report (/v6.2-bzimage/mm/kasan/report.c:307 /v6.2-bzimage/mm/kasan/report.c:417) ? hci_dev_open_sync (/v6.2-bzimage/net/bluetooth/hci_sync.c:3154 /v6.2-bzimage/net/bluetooth/hci_sync.c:3343 /v6.2-bzimage/net/bluetooth/hci_sync.c:4418 /v6.2-bzimage/net/bluetooth/hci_sync.c:4609 /v6.2-bzimage/net/bluetooth/hci_sync.c:4689) kasan_report (/v6.2-bzimage/mm/kasan/report.c:184 /v6.2-bzimage/mm/kasan/report.c:519) ? hci_dev_open_sync (/v6.2-bzimage/net/bluetooth/hci_sync.c:3154 /v6.2-bzimage/net/bluetooth/hci_sync.c:3343 /v6.2-bzimage/net/bluetooth/hci_sync.c:4418 /v6.2-bzimage/net/bluetooth/hci_sync.c:4609 /v6.2-bzimage/net/bluetooth/hci_sync.c:4689) hci_dev_open_sync (/v6.2-bzimage/net/bluetooth/hci_sync.c:3154 /v6.2-bzimage/net/bluetooth/hci_sync.c:3343 /v6.2-bzimage/net/bluetooth/hci_sync.c:4418 /v6.2-bzimage/net/bluetooth/hci_sync.c:4609 /v6.2-bzimage/net/bluetooth/hci_sync.c:4689) ? __pfx_hci_dev_open_sync (/v6.2-bzimage/net/bluetooth/hci_sync.c:4635) ? mutex_lock (/v6.2-bzimage/./arch/x86/include/asm/atomic64_64.h:190 /v6.2-bzimage/./include/linux/atomic/atomic-long.h:443 /v6.2-bzimage/./include/linux/atomic/atomic-instrumented.h:1781 /v6.2-bzimage/kernel/locking/mutex.c:171 /v6.2-bzimage/kernel/locking/mutex.c:285) ? __pfx_mutex_lock (/v6.2-bzimage/kernel/locking/mutex.c:282) hci_power_on (/v6.2-bzimage/net/bluetooth/hci_core.c:485 /v6.2-bzimage/net/bluetooth/hci_core.c:984) ? __pfx_hci_power_on (/v6.2-bzimage/net/bluetooth/hci_core.c:969) ? read_word_at_a_time (/v6.2-bzimage/./include/asm-generic/rwonce.h:85) ? strscpy (/v6.2-bzimage/./arch/x86/include/asm/word-at-a-time.h:62 /v6.2-bzimage/lib/string.c:161) process_one_work (/v6.2-bzimage/kernel/workqueue.c:2294) worker_thread (/v6.2-bzimage/./include/linux/list.h:292 /v6.2-bzimage/kernel/workqueue.c:2437) ? __pfx_worker_thread (/v6.2-bzimage/kernel/workqueue.c:2379) kthread (/v6.2-bzimage/kernel/kthread.c:376) ? __pfx_kthread (/v6.2-bzimage/kernel/kthread.c:331) ret_from_fork (/v6.2-bzimage/arch/x86/entry/entry_64.S:314) The buggy address belongs to the variable: amp_init1+0x30/0x60 The buggy address belongs to the physical page: page:000000003a157ec6 refcount:1 mapcount:0 mapping:0000000000000000 ia flags: 0x200000000001000(reserved|node=0|zone=2) raw: 0200000000001000 ffffea0005054688 ffffea0005054688 000000000000000 raw: 0000000000000000 0000000000000000 00000001ffffffff 000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffffffffaed1aa00: f9 f9 f9 f9 00 00 00 00 f9 f9 f9 f9 00 00 00 00 ffffffffaed1aa80: 00 00 00 00 f9 f9 f9 f9 00 00 00 00 00 00 00 00 >ffffffffaed1ab00: 00 f9 f9 f9 f9 f9 f9 f9 00 00 00 00 00 00 f9 f9 ^ ffffffffaed1ab80: f9 f9 f9 f9 00 00 00 00 f9 f9 f9 f9 00 00 00 f9 ffffffffaed1ac00: f9 f9 f9 f9 00 06 f9 f9 f9 f9 f9 f9 00 00 02 f9 This bug is found by FuzzBT, a modified version of Syzkaller. Other contributors for this bug are Ruoyu Wu and Peng Hui. Fixes: d0b137062b2d ("Bluetooth: hci_sync: Rework init stages") Signed-off-by: Sungwoo Kim Reviewed-by: Simon Horman Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 5b8dc8fb2e27..5a6aa1627791 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3360,6 +3360,7 @@ static const struct hci_init_stage amp_init1[] = { HCI_INIT(hci_read_flow_control_mode_sync), /* HCI_OP_READ_LOCATION_DATA */ HCI_INIT(hci_read_location_data_sync), + {} }; static int hci_init1_sync(struct hci_dev *hdev) @@ -3394,6 +3395,7 @@ static int hci_init1_sync(struct hci_dev *hdev) static const struct hci_init_stage amp_init2[] = { /* HCI_OP_READ_LOCAL_FEATURES */ HCI_INIT(hci_read_local_features_sync), + {} }; /* Read Buffer Size (ACL mtu, max pkt, etc.) */ -- cgit From e9c3cda4d86e56bf7fe403729f38c4f0f65d3860 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 6 Mar 2023 09:15:17 +0100 Subject: mm, vmalloc: fix high order __GFP_NOFAIL allocations Gao Xiang has reported that the page allocator complains about high order __GFP_NOFAIL request coming from the vmalloc core: __alloc_pages+0x1cb/0x5b0 mm/page_alloc.c:5549 alloc_pages+0x1aa/0x270 mm/mempolicy.c:2286 vm_area_alloc_pages mm/vmalloc.c:2989 [inline] __vmalloc_area_node mm/vmalloc.c:3057 [inline] __vmalloc_node_range+0x978/0x13c0 mm/vmalloc.c:3227 kvmalloc_node+0x156/0x1a0 mm/util.c:606 kvmalloc include/linux/slab.h:737 [inline] kvmalloc_array include/linux/slab.h:755 [inline] kvcalloc include/linux/slab.h:760 [inline] it seems that I have completely missed high order allocation backing vmalloc areas case when implementing __GFP_NOFAIL support. This means that [k]vmalloc at al. can allocate higher order allocations with __GFP_NOFAIL which can trigger OOM killer for non-costly orders easily or cause a lot of reclaim/compaction activity if those requests cannot be satisfied. Fix the issue by falling back to zero order allocations for __GFP_NOFAIL requests if the high order request fails. Link: https://lkml.kernel.org/r/ZAXynvdNqcI0f6Us@dhcp22.suse.cz Fixes: 9376130c390a ("mm/vmalloc: add support for __GFP_NOFAIL") Reported-by: Gao Xiang Link: https://lkml.kernel.org/r/20230305053035.1911-1-hsiangkao@linux.alibaba.com Signed-off-by: Michal Hocko Reviewed-by: Uladzislau Rezki (Sony) Acked-by: Vlastimil Babka Cc: Baoquan He Cc: Christoph Hellwig Cc: Mel Gorman Signed-off-by: Andrew Morton --- mm/vmalloc.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ef910bf349e1..bef6cf2b4d46 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2883,6 +2883,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, unsigned int order, unsigned int nr_pages, struct page **pages) { unsigned int nr_allocated = 0; + gfp_t alloc_gfp = gfp; + bool nofail = false; struct page *page; int i; @@ -2893,6 +2895,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, * more permissive. */ if (!order) { + /* bulk allocator doesn't support nofail req. officially */ gfp_t bulk_gfp = gfp & ~__GFP_NOFAIL; while (nr_allocated < nr_pages) { @@ -2931,20 +2934,35 @@ vm_area_alloc_pages(gfp_t gfp, int nid, if (nr != nr_pages_request) break; } + } else if (gfp & __GFP_NOFAIL) { + /* + * Higher order nofail allocations are really expensive and + * potentially dangerous (pre-mature OOM, disruptive reclaim + * and compaction etc. + */ + alloc_gfp &= ~__GFP_NOFAIL; + nofail = true; } /* High-order pages or fallback path if "bulk" fails. */ - while (nr_allocated < nr_pages) { if (fatal_signal_pending(current)) break; if (nid == NUMA_NO_NODE) - page = alloc_pages(gfp, order); + page = alloc_pages(alloc_gfp, order); else - page = alloc_pages_node(nid, gfp, order); - if (unlikely(!page)) - break; + page = alloc_pages_node(nid, alloc_gfp, order); + if (unlikely(!page)) { + if (!nofail) + break; + + /* fall back to the zero order allocations */ + alloc_gfp |= __GFP_NOFAIL; + order = 0; + continue; + } + /* * Higher order allocations must be able to be treated as * indepdenent small pages by callers (as they can with -- cgit From 0fa99fdfe1b38da396d0b2d1496a823bcd0ebea0 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 7 Mar 2023 13:02:46 -0500 Subject: maple_tree: fix mas_skip_node() end slot detection Patch series "Fix mas_skip_node() for mas_empty_area()", v2. mas_empty_area() was incorrectly returning an error when there was room. The issue was tracked down to mas_skip_node() using the incorrect end-of-slot count. Instead of using the nodes hard limit, the limit of data should be used. mas_skip_node() was also setting the min and max to that of the child node, which was unnecessary. Within these limits being set, there was also a bug that corrupted the maple state's max if the offset was set to the maximum node pivot. The bug was without consequence unless there was a sufficient gap in the next child node which would cause an error to be returned. This patch set fixes these errors by removing the limit setting from mas_skip_node() and uses the mas_data_end() for slot limits, and adds tests for all failures discovered. This patch (of 2): mas_skip_node() is used to move the maple state to the node with a higher limit. It does this by walking up the tree and increasing the slot count. Since slot count may not be able to be increased, it may need to walk up multiple times to find room to walk right to a higher limit node. The limit of slots that was being used was the node limit and not the last location of data in the node. This would cause the maple state to be shifted outside actual data and enter an error state, thus returning -EBUSY. The result of the incorrect error state means that mas_awalk() would return an error instead of finding the allocation space. The fix is to use mas_data_end() in mas_skip_node() to detect the nodes data end point and continue walking the tree up until it is safe to move to a node with a higher limit. The walk up the tree also sets the maple state limits so remove the buggy code from mas_skip_node(). Setting the limits had the unfortunate side effect of triggering another bug if the parent node was full and the there was no suitable gap in the second last child, but room in the next child. mas_skip_node() may also be passed a maple state in an error state from mas_anode_descend() when no allocations are available. Return on such an error state immediately. Link: https://lkml.kernel.org/r/20230307180247.2220303-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230307180247.2220303-2-Liam.Howlett@oracle.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Reported-by: Snild Dolkow Link: https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ Tested-by: Snild Dolkow Cc: Peng Zhang Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 646297cae5d1..9e2735cbc2b4 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5099,35 +5099,21 @@ static inline bool mas_rewind_node(struct ma_state *mas) */ static inline bool mas_skip_node(struct ma_state *mas) { - unsigned char slot, slot_count; - unsigned long *pivots; - enum maple_type mt; + if (mas_is_err(mas)) + return false; - mt = mte_node_type(mas->node); - slot_count = mt_slots[mt] - 1; do { if (mte_is_root(mas->node)) { - slot = mas->offset; - if (slot > slot_count) { + if (mas->offset >= mas_data_end(mas)) { mas_set_err(mas, -EBUSY); return false; } } else { mas_ascend(mas); - slot = mas->offset; - mt = mte_node_type(mas->node); - slot_count = mt_slots[mt] - 1; } - } while (slot > slot_count); - - mas->offset = ++slot; - pivots = ma_pivots(mas_mn(mas), mt); - if (slot > 0) - mas->min = pivots[slot - 1] + 1; - - if (slot <= slot_count) - mas->max = pivots[slot]; + } while (mas->offset >= mas_data_end(mas)); + mas->offset++; return true; } -- cgit From 4bd6dded6318dc8e2514d74868c1f8fb38b61a60 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Tue, 7 Mar 2023 13:02:47 -0500 Subject: test_maple_tree: add more testing for mas_empty_area() Test robust filling of an entire area of the tree, then test one beyond. This is to test the walking back up the tree at the end of nodes and error condition. Test inspired by the reproducer code provided by Snild Dolkow. The last test in the function tests for the case of a corrupted maple state caused by the incorrect limits set during mas_skip_node(). There needs to be a gap in the second last child and last child, but the search must rule out the second last child's gap. This would avoid correcting the maple state to the correct max limit and return an error. Link: https://lkml.kernel.org/r/20230307180247.2220303-3-Liam.Howlett@oracle.com Cc: Snild Dolkow Link: https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ Fixes: e15e06a83923 ("lib/test_maple_tree: add testing for maple tree") Signed-off-by: Liam R. Howlett Cc: Peng Zhang Cc: Signed-off-by: Andrew Morton --- lib/test_maple_tree.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 3d19b1f78d71..f1db333270e9 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -2670,6 +2670,49 @@ static noinline void check_empty_area_window(struct maple_tree *mt) rcu_read_unlock(); } +static noinline void check_empty_area_fill(struct maple_tree *mt) +{ + const unsigned long max = 0x25D78000; + unsigned long size; + int loop, shift; + MA_STATE(mas, mt, 0, 0); + + mt_set_non_kernel(99999); + for (shift = 12; shift <= 16; shift++) { + loop = 5000; + size = 1 << shift; + while (loop--) { + mas_set(&mas, 0); + mas_lock(&mas); + MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != 0); + MT_BUG_ON(mt, mas.last != mas.index + size - 1); + mas_store_gfp(&mas, (void *)size, GFP_KERNEL); + mas_unlock(&mas); + mas_reset(&mas); + } + } + + /* No space left. */ + size = 0x1000; + rcu_read_lock(); + MT_BUG_ON(mt, mas_empty_area(&mas, 0, max, size) != -EBUSY); + rcu_read_unlock(); + + /* Fill a depth 3 node to the maximum */ + for (unsigned long i = 629440511; i <= 629440800; i += 6) + mtree_store_range(mt, i, i + 5, (void *)i, GFP_KERNEL); + /* Make space in the second-last depth 4 node */ + mtree_erase(mt, 631668735); + /* Make space in the last depth 4 node */ + mtree_erase(mt, 629506047); + mas_reset(&mas); + /* Search from just after the gap in the second-last depth 4 */ + rcu_read_lock(); + MT_BUG_ON(mt, mas_empty_area(&mas, 629506048, 690000000, 0x5000) != 0); + rcu_read_unlock(); + mt_set_non_kernel(0); +} + static DEFINE_MTREE(tree); static int maple_tree_seed(void) { @@ -2926,6 +2969,11 @@ static int maple_tree_seed(void) check_empty_area_window(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_empty_area_fill(&tree); + mtree_destroy(&tree); + + #if defined(BENCH) skip: #endif -- cgit From 003587000276f81d0114b5ce773d80c119d8cb30 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 7 Mar 2023 17:55:48 +0900 Subject: nilfs2: fix kernel-infoleak in nilfs_ioctl_wrap_copy() The ioctl helper function nilfs_ioctl_wrap_copy(), which exchanges a metadata array to/from user space, may copy uninitialized buffer regions to user space memory for read-only ioctl commands NILFS_IOCTL_GET_SUINFO and NILFS_IOCTL_GET_CPINFO. This can occur when the element size of the user space metadata given by the v_size member of the argument nilfs_argv structure is larger than the size of the metadata element (nilfs_suinfo structure or nilfs_cpinfo structure) on the file system side. KMSAN-enabled kernels detect this issue as follows: BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:121 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0xc0/0x100 lib/usercopy.c:33 instrument_copy_to_user include/linux/instrumented.h:121 [inline] _copy_to_user+0xc0/0x100 lib/usercopy.c:33 copy_to_user include/linux/uaccess.h:169 [inline] nilfs_ioctl_wrap_copy+0x6fa/0xc10 fs/nilfs2/ioctl.c:99 nilfs_ioctl_get_info fs/nilfs2/ioctl.c:1173 [inline] nilfs_ioctl+0x2402/0x4450 fs/nilfs2/ioctl.c:1290 nilfs_compat_ioctl+0x1b8/0x200 fs/nilfs2/ioctl.c:1343 __do_compat_sys_ioctl fs/ioctl.c:968 [inline] __se_compat_sys_ioctl+0x7dd/0x1000 fs/ioctl.c:910 __ia32_compat_sys_ioctl+0x93/0xd0 fs/ioctl.c:910 do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178 do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246 entry_SYSENTER_compat_after_hwframe+0x70/0x82 Uninit was created at: __alloc_pages+0x9f6/0xe90 mm/page_alloc.c:5572 alloc_pages+0xab0/0xd80 mm/mempolicy.c:2287 __get_free_pages+0x34/0xc0 mm/page_alloc.c:5599 nilfs_ioctl_wrap_copy+0x223/0xc10 fs/nilfs2/ioctl.c:74 nilfs_ioctl_get_info fs/nilfs2/ioctl.c:1173 [inline] nilfs_ioctl+0x2402/0x4450 fs/nilfs2/ioctl.c:1290 nilfs_compat_ioctl+0x1b8/0x200 fs/nilfs2/ioctl.c:1343 __do_compat_sys_ioctl fs/ioctl.c:968 [inline] __se_compat_sys_ioctl+0x7dd/0x1000 fs/ioctl.c:910 __ia32_compat_sys_ioctl+0x93/0xd0 fs/ioctl.c:910 do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178 do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246 entry_SYSENTER_compat_after_hwframe+0x70/0x82 Bytes 16-127 of 3968 are uninitialized ... This eliminates the leak issue by initializing the page allocated as buffer using get_zeroed_page(). Link: https://lkml.kernel.org/r/20230307085548.6290-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+132fdd2f1e1805fdc591@syzkaller.appspotmail.com Link: https://lkml.kernel.org/r/000000000000a5bd2d05f63f04ae@google.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 5ccc638ae92f..1dfbc0c34513 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -71,7 +71,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_index > ~(__u64)0 - argv->v_nmembs) return -EINVAL; - buf = (void *)__get_free_pages(GFP_NOFS, 0); + buf = (void *)get_zeroed_page(GFP_NOFS); if (unlikely(!buf)) return -ENOMEM; maxmembs = PAGE_SIZE / argv->v_size; -- cgit From 12871a154690c52e2ded718b392a3977c114f6c1 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 7 Mar 2023 15:59:00 +0800 Subject: checksyscalls: ignore fstat to silence build warning on LoongArch fstat is replaced by statx on the new architecture, so an exception is added to the checksyscalls script to silence the following build warning on LoongArch: CALL scripts/checksyscalls.sh :569:2: warning: #warning syscall fstat not implemented [-Wcpp] Link: https://lkml.kernel.org/r/1678175940-20872-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Suggested-by: WANG Xuerui Suggested-by: Arnd Bergmann Reviewed-by: Arnd Bergmann Cc: Huacai Chen Cc: Masahiro Yamada Signed-off-by: Andrew Morton --- scripts/checksyscalls.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh index f33e61aca93d..1e5d2eeb726d 100755 --- a/scripts/checksyscalls.sh +++ b/scripts/checksyscalls.sh @@ -114,7 +114,6 @@ cat << EOF #define __IGNORE_truncate #define __IGNORE_stat #define __IGNORE_lstat -#define __IGNORE_fstat #define __IGNORE_fcntl #define __IGNORE_fadvise64 #define __IGNORE_newfstatat @@ -255,6 +254,9 @@ cat << EOF /* 64-bit ports never needed these, and new 32-bit ports can use statx */ #define __IGNORE_fstat64 #define __IGNORE_fstatat64 + +/* Newer ports are not required to provide fstat in favor of statx */ +#define __IGNORE_fstat EOF } -- cgit From 6bbf1090672673183a98cd6e19de91fa5a319df0 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 8 Mar 2023 19:04:20 +0000 Subject: mm: deduplicate error handling for map_deny_write_exec Patch series "Fixes for MDWE prctl" These are four small fixes for the recent memory-write-deny-execute prctl patches [1]. Two reported by Alexey about error handling and two tooling fixes by Peter. This patch (of 4): Commit cc8d1b097de7 ("mmap: clean up mmap_region() unrolling") deduplicated the error handling, do the same for the return value of `map_deny_write_exec`. Link: https://lkml.kernel.org/r/20230308190423.46491-1-joey.gouly@arm.com Link: https://lkml.kernel.org/r/20230308190423.46491-2-joey.gouly@arm.com Link: https://lore.kernel.org/linux-arm-kernel/20230119160344.54358-1-joey.gouly@arm.com/ [1] Fixes: b507808ebce2 ("mm: implement memory-deny-write-execute as a prctl") Signed-off-by: Joey Gouly Reported-by: Alexey Izbyshev Link: https://lore.kernel.org/linux-arm-kernel/8408d8901e9d7ee6b78db4c6cba04b78@ispras.ru/ Reviewed-by: Catalin Marinas Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: nd Cc: Peter Xu Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/mmap.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 740b54be3ed4..ad499f7b767f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2621,12 +2621,7 @@ cannot_expand: if (map_deny_write_exec(vma, vma->vm_flags)) { error = -EACCES; - if (file) - goto close_and_free_vma; - else if (vma->vm_file) - goto unmap_and_free_vma; - else - goto free_vma; + goto close_and_free_vma; } /* Allow architectures to sanity-check the vm_flags */ -- cgit From 3d27a95b1d96757e48ce970baa3d419af299c2af Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Wed, 8 Mar 2023 19:04:21 +0000 Subject: mm: fix error handling for map_deny_write_exec Commit 4a18419f71cd ("mm/mprotect: use mmu_gather") changed 'goto out;' to 'break' in the loop. This wasn't noticed while rebasing the MDWE patches, so fix it now. Link: https://lkml.kernel.org/r/20230308190423.46491-3-joey.gouly@arm.com Fixes: b507808ebce2 ("mm: implement memory-deny-write-execute as a prctl") Signed-off-by: Joey Gouly Reported-by: Alexey Izbyshev Link: https://lore.kernel.org/linux-arm-kernel/8408d8901e9d7ee6b78db4c6cba04b78@ispras.ru/ Reviewed-by: Catalin Marinas Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: nd Cc: Peter Xu Cc: Shuah Khan Signed-off-by: Andrew Morton --- mm/mprotect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 231929f119d9..13e84d8c0797 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -805,7 +805,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, if (map_deny_write_exec(vma, newflags)) { error = -EACCES; - goto out; + break; } /* Allow architectures to sanity-check the new flags */ -- cgit From d035230ec9937a9138921d2a0eeb99496ea7eac0 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 8 Mar 2023 19:04:22 +0000 Subject: kselftest: vm: fix unused variable warning Remove unused variable from the MDWE test. [joey.gouly@arm.com: add commit message] Link: https://lkml.kernel.org/r/20230308190423.46491-4-joey.gouly@arm.com Fixes: 4cf1fe34fd18 ("kselftest: vm: add tests for memory-deny-write-execute") Signed-off-by: Peter Xu Signed-off-by: Joey Gouly Acked-by: Catalin Marinas Cc: Alexey Izbyshev Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: nd Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mdwe_test.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c index f466a099f1bf..bc91bef5d254 100644 --- a/tools/testing/selftests/mm/mdwe_test.c +++ b/tools/testing/selftests/mm/mdwe_test.c @@ -163,9 +163,8 @@ TEST_F(mdwe, mprotect_WRITE_EXEC) TEST_F(mdwe, mmap_FIXED) { - void *p, *p2; + void *p; - p2 = mmap(NULL, self->size, PROT_READ | PROT_EXEC, self->flags, 0, 0); self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0); ASSERT_NE(self->p, MAP_FAILED); -- cgit From 6db504ce55bdbc575723938fc480713c9183f6a2 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 8 Mar 2023 17:03:10 -0500 Subject: mm/ksm: fix race with VMA iteration and mm_struct teardown exit_mmap() will tear down the VMAs and maple tree with the mmap_lock held in write mode. Ensure that the maple tree is still valid by checking ksm_test_exit() after taking the mmap_lock in read mode, but before the for_each_vma() iterator dereferences a destroyed maple tree. Since the maple tree is destroyed, the flags telling lockdep to check an external lock has been cleared. Skip the for_each_vma() iterator to avoid dereferencing a maple tree without the external lock flag, which would create a lockdep warning. Link: https://lkml.kernel.org/r/20230308220310.3119196-1-Liam.Howlett@oracle.com Fixes: a5f18ba07276 ("mm/ksm: use vma iterators instead of vma linked list") Signed-off-by: Liam R. Howlett Reported-by: Pengfei Xu Link: https://lore.kernel.org/lkml/ZAdUUhSbaa6fHS36@xpf.sh.intel.com/ Reported-by: syzbot+2ee18845e89ae76342c5@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=64a3e95957cd3deab99df7cd7b5a9475af92c93e Acked-by: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Cc: Signed-off-by: Andrew Morton --- mm/ksm.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index ad591b779d53..2b8d30068cbb 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -988,9 +988,15 @@ static int unmerge_and_remove_all_rmap_items(void) mm = mm_slot->slot.mm; mmap_read_lock(mm); + + /* + * Exit right away if mm is exiting to avoid lockdep issue in + * the maple tree + */ + if (ksm_test_exit(mm)) + goto mm_exiting; + for_each_vma(vmi, vma) { - if (ksm_test_exit(mm)) - break; if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) continue; err = unmerge_ksm_pages(vma, @@ -999,6 +1005,7 @@ static int unmerge_and_remove_all_rmap_items(void) goto error; } +mm_exiting: remove_trailing_rmap_items(&mm_slot->rmap_list); mmap_read_unlock(mm); -- cgit From 90db9dbedd26ce029f3a0f8d2cbd3a142f452408 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 27 Feb 2023 10:47:27 +0100 Subject: kasan, powerpc: don't rename memintrinsics if compiler adds prefixes With appropriate compiler support [1], KASAN builds use __asan prefixed meminstrinsics, and KASAN no longer overrides memcpy/memset/memmove. If compiler support is detected (CC_HAS_KASAN_MEMINTRINSIC_PREFIX), define memintrinsics normally (do not prefix '__'). On powerpc, KASAN is the only user of __mem functions, which are used to define instrumented memintrinsics. Alias the normal versions for KASAN to use in its implementation. Link: https://lore.kernel.org/all/20230224085942.1791837-1-elver@google.com/ [1] Link: https://lore.kernel.org/oe-kbuild-all/202302271348.U5lvmo0S-lkp@intel.com/ Link: https://lkml.kernel.org/r/20230227094726.3833247-1-elver@google.com Signed-off-by: Marco Elver Reported-by: kernel test robot Acked-by: Michael Ellerman [powerpc] Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Christophe Leroy Cc: Daniel Axtens Cc: Dmitry Vyukov Cc: Liam R. Howlett Cc: Nicholas Piggin Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- arch/powerpc/include/asm/kasan.h | 2 +- arch/powerpc/include/asm/string.h | 15 +++++++++++---- arch/powerpc/kernel/prom_init_check.sh | 9 +++++++-- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 92a968202ba7..365d2720097c 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX) #define _GLOBAL_KASAN(fn) _GLOBAL(__##fn) #define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn) #define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn) diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 2aa0e31e6884..60ba22770f51 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -30,11 +30,17 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); void memcpy_flushcache(void *dest, const void *src, size_t size); +#ifdef CONFIG_KASAN +/* __mem variants are used by KASAN to implement instrumented meminstrinsics. */ +#ifdef CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX +#define __memset memset +#define __memcpy memcpy +#define __memmove memmove +#else /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */ void *__memset(void *s, int c, __kernel_size_t count); void *__memcpy(void *to, const void *from, __kernel_size_t n); void *__memmove(void *to, const void *from, __kernel_size_t n); - -#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) +#ifndef __SANITIZE_ADDRESS__ /* * For files that are not instrumented (e.g. mm/slub.c) we * should use not instrumented version of mem* functions. @@ -46,8 +52,9 @@ void *__memmove(void *to, const void *from, __kernel_size_t n); #ifndef __NO_FORTIFY #define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ #endif - -#endif +#endif /* !__SANITIZE_ADDRESS__ */ +#endif /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */ +#endif /* CONFIG_KASAN */ #ifdef CONFIG_PPC64 #ifndef CONFIG_KASAN diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 5a319863f289..69623b9045d5 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -13,8 +13,13 @@ # If you really need to reference something from prom_init.o add # it to the list below: -grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null -if [ $? -eq 0 ] +has_renamed_memintrinsics() +{ + grep -q "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} && \ + ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" ${KCONFIG_CONFIG} +} + +if has_renamed_memintrinsics then MEM_FUNCS="__memcpy __memset" else -- cgit From cc2a978d28422ca957d14cb25caa72e25c45be1d Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 10 Mar 2023 13:35:08 +0100 Subject: mailmap: add entry for Tobias Klauser Map my old email addresses to the current address. Link: https://lkml.kernel.org/r/20230310123508.22079-1-tklauser@distanz.ch Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton --- .mailmap | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.mailmap b/.mailmap index 317e51a0065c..96e9acaed89f 100644 --- a/.mailmap +++ b/.mailmap @@ -437,6 +437,10 @@ Thomas Graf Thomas Körper Thomas Pedersen Tiezhu Yang +Tobias Klauser +Tobias Klauser +Tobias Klauser +Tobias Klauser Todor Tomov Tony Luck TripleX Chung -- cgit From f446883d12b8bfa486f7c98d403054d61d38c989 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 9 Mar 2023 20:29:13 -0800 Subject: Revert "kasan: drop skip_kasan_poison variable in free_pages_prepare" This reverts commit 487a32ec24be819e747af8c2ab0d5c515508086a. should_skip_kasan_poison() reads the PG_skip_kasan_poison flag from page->flags. However, this line of code in free_pages_prepare(): page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; clears most of page->flags, including PG_skip_kasan_poison, before calling should_skip_kasan_poison(), which meant that it would never return true as a result of the page flag being set. Therefore, fix the code to call should_skip_kasan_poison() before clearing the flags, as we were doing before the reverted patch. This fixes a measurable performance regression introduced in the reverted commit, where munmap() takes longer than intended if HW tags KASAN is supported and enabled at runtime. Without this patch, we see a single-digit percentage performance regression in a particular mmap()-heavy benchmark when enabling HW tags KASAN, and with the patch, there is no statistically significant performance impact when enabling HW tags KASAN. Link: https://lkml.kernel.org/r/20230310042914.3805818-2-pcc@google.com Fixes: 487a32ec24be ("kasan: drop skip_kasan_poison variable in free_pages_prepare") Link: https://linux-review.googlesource.com/id/Ic4f13affeebd20548758438bb9ed9ca40e312b79 Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Catalin Marinas [arm64] Cc: Evgenii Stepanov Cc: Vincenzo Frascino Cc: Will Deacon Cc: [6.1] Signed-off-by: Andrew Morton --- mm/page_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ac1fc986af44..7136c36c5d01 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1398,6 +1398,7 @@ static __always_inline bool free_pages_prepare(struct page *page, unsigned int order, bool check_free, fpi_t fpi_flags) { int bad = 0; + bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags); bool init = want_init_on_free(); VM_BUG_ON_PAGE(PageTail(page), page); @@ -1470,7 +1471,7 @@ static __always_inline bool free_pages_prepare(struct page *page, * With hardware tag-based KASAN, memory tags must be set before the * page becomes unavailable via debug_pagealloc or arch_free_page. */ - if (!should_skip_kasan_poison(page, fpi_flags)) { + if (!skip_kasan_poison) { kasan_poison_pages(page, order, init); /* Memory is already initialized if KASAN did it internally. */ -- cgit From 83bd3eeb326bb0b5800573622d18e1c5e4355323 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 13 Mar 2023 10:03:43 +0100 Subject: mailmap: map Rajendra Nayak's old address to his current one Rajendra's old email is still picked up by the likes of get_maintainer.pl and keeps bouncing like all other @codeaurora.org addresses. Map it to his current one. Link: https://lkml.kernel.org/r/20230313090343.2148346-1-konrad.dybcio@linaro.org Signed-off-by: Konrad Dybcio Cc: Rajendra Nayak Cc: Andy Gross Cc: Baolin Wang Cc: Bjorn Andersson Cc: Colin Ian King Cc: Jakub Kicinski Cc: Kirill Tkhai Cc: Marijn Suijten Cc: Qais Yousef Cc: Stephen Hemminger Cc: Vasily Averin Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 96e9acaed89f..bc213f9eaf6f 100644 --- a/.mailmap +++ b/.mailmap @@ -379,6 +379,7 @@ Quentin Monnet Quentin Perret Rafael J. Wysocki Rajeev Nandan +Rajendra Nayak Rajesh Shah Ralf Baechle Ralf Wildenhues -- cgit From 9e26240c3bc1cf4895abdd5330a35465b5050109 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 14 Mar 2023 13:56:03 +0100 Subject: mailmap: map Sai Prakash Ranjan's old address to his current one Sai's old email is still picked up by the likes of get_maintainer.pl and keeps bouncing like all other @codeaurora.org addresses. Map it to his current one. Link: https://lkml.kernel.org/r/20230314125604.2734146-1-konrad.dybcio@linaro.org Signed-off-by: Konrad Dybcio Cc: Sai Prakash Ranjan Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index bc213f9eaf6f..3c7f00496072 100644 --- a/.mailmap +++ b/.mailmap @@ -398,6 +398,7 @@ Ross Zwisler Rudolf Marek Rui Saraiva Sachin P Sant +Sai Prakash Ranjan Sakari Ailus Sam Ravnborg Sankeerth Billakanti -- cgit From d2e44a50ecebb72257e5deb7ce8d227d7e3f53b9 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Tue, 14 Mar 2023 12:54:55 +0100 Subject: mailmap: add entry for Enric Balletbo i Serra Map Enric's old corporate addresses to his kernel.org address. Link: https://lkml.kernel.org/r/20230314115455.188818-1-eballetbo@kernel.org Signed-off-by: Enric Balletbo i Serra Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 3c7f00496072..cebd939c3a91 100644 --- a/.mailmap +++ b/.mailmap @@ -133,6 +133,8 @@ Dmitry Safonov <0x7f454c46@gmail.com> Domen Puncer Douglas Gilbert Ed L. Cashin +Enric Balletbo i Serra +Enric Balletbo i Serra Erik Kaneda Eugen Hristev Evgeniy Polyakov -- cgit From 13684e966d46283e0e89b6a4941596dc52b18bf3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Mar 2023 15:28:17 +0100 Subject: lib: dhry: fix unstable smp_processor_id(_) usage When running the in-kernel Dhrystone benchmark with CONFIG_DEBUG_PREEMPT=y: BUG: using smp_processor_id() in preemptible [00000000] code: bash/938 Fix this by not using smp_processor_id() directly, but instead wrapping the whole benchmark inside a get_cpu()/put_cpu() pair. This makes sure the whole benchmark is run on the same CPU core, and the reported values are consistent. Link: https://lkml.kernel.org/r/b0d29932bb24ad82cea7f821e295c898e9657be0.1678890070.git.geert+renesas@glider.be Fixes: d5528cc16893f1f6 ("lib: add Dhrystone benchmark test") Signed-off-by: Geert Uytterhoeven Reported-by: Tobias Klausmann Link: https://bugzilla.kernel.org/show_bug.cgi?id=217179 Signed-off-by: Andrew Morton --- lib/dhry_run.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/dhry_run.c b/lib/dhry_run.c index f9d33efa6d09..f15ac666e9d3 100644 --- a/lib/dhry_run.c +++ b/lib/dhry_run.c @@ -31,6 +31,7 @@ MODULE_PARM_DESC(iterations, static void dhry_benchmark(void) { + unsigned int cpu = get_cpu(); int i, n; if (iterations > 0) { @@ -45,9 +46,10 @@ static void dhry_benchmark(void) } report: + put_cpu(); if (n >= 0) - pr_info("CPU%u: Dhrystones per Second: %d (%d DMIPS)\n", - smp_processor_id(), n, n / DHRY_VAX); + pr_info("CPU%u: Dhrystones per Second: %d (%d DMIPS)\n", cpu, + n, n / DHRY_VAX); else if (n == -EAGAIN) pr_err("Please increase the number of iterations\n"); else -- cgit From 1c86a188e03156223a34d09ce290b49bd4dd0403 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Wed, 15 Mar 2023 11:44:41 +0800 Subject: mm: kfence: fix using kfence_metadata without initialization in show_object() The variable kfence_metadata is initialized in kfence_init_pool(), then, it is not initialized if kfence is disabled after booting. In this case, kfence_metadata will be used (e.g. ->lock and ->state fields) without initialization when reading /sys/kernel/debug/kfence/objects. There will be a warning if you enable CONFIG_DEBUG_SPINLOCK. Fix it by creating debugfs files when necessary. Link: https://lkml.kernel.org/r/20230315034441.44321-1-songmuchun@bytedance.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Muchun Song Tested-by: Marco Elver Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Jann Horn Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/kfence/core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 5349c37a5dac..79c94ee55f97 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -726,10 +726,14 @@ static const struct seq_operations objects_sops = { }; DEFINE_SEQ_ATTRIBUTE(objects); -static int __init kfence_debugfs_init(void) +static int kfence_debugfs_init(void) { - struct dentry *kfence_dir = debugfs_create_dir("kfence", NULL); + struct dentry *kfence_dir; + if (!READ_ONCE(kfence_enabled)) + return 0; + + kfence_dir = debugfs_create_dir("kfence", NULL); debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops); debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops); return 0; @@ -883,6 +887,8 @@ static int kfence_init_late(void) } kfence_init_enable(); + kfence_debugfs_init(); + return 0; } -- cgit From 2e08ca1802441224f5b7cc6bffbb687f7406de95 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 16 Mar 2023 23:47:04 +0100 Subject: kfence: avoid passing -g for test Nathan reported that when building with GNU as and a version of clang that defaults to DWARF5: $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- \ LLVM=1 LLVM_IAS=0 O=build \ mrproper allmodconfig mm/kfence/kfence_test.o /tmp/kfence_test-08a0a0.s: Assembler messages: /tmp/kfence_test-08a0a0.s:14627: Error: non-constant .uleb128 is not supported /tmp/kfence_test-08a0a0.s:14628: Error: non-constant .uleb128 is not supported /tmp/kfence_test-08a0a0.s:14632: Error: non-constant .uleb128 is not supported /tmp/kfence_test-08a0a0.s:14633: Error: non-constant .uleb128 is not supported /tmp/kfence_test-08a0a0.s:14639: Error: non-constant .uleb128 is not supported ... This is because `-g` defaults to the compiler debug info default. If the assembler does not support some of the directives used, the above errors occur. To fix, remove the explicit passing of `-g`. All the test wants is that stack traces print valid function names, and debug info is not required for that. (I currently cannot recall why I added the explicit `-g`.) Link: https://lkml.kernel.org/r/20230316224705.709984-1-elver@google.com Fixes: bc8fbc5f305a ("kfence: add test suite") Signed-off-by: Marco Elver Reported-by: Nathan Chancellor Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton --- mm/kfence/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kfence/Makefile b/mm/kfence/Makefile index 0bb95728a784..2de2a58d11a1 100644 --- a/mm/kfence/Makefile +++ b/mm/kfence/Makefile @@ -2,5 +2,5 @@ obj-y := core.o report.o -CFLAGS_kfence_test.o := -g -fno-omit-frame-pointer -fno-optimize-sibling-calls +CFLAGS_kfence_test.o := -fno-omit-frame-pointer -fno-optimize-sibling-calls obj-$(CONFIG_KFENCE_KUNIT_TEST) += kfence_test.o -- cgit From 5eb39cde1e2487ba5ec1802dc5e58a77e700d99e Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 16 Mar 2023 23:47:05 +0100 Subject: kcsan: avoid passing -g for test Nathan reported that when building with GNU as and a version of clang that defaults to DWARF5, the assembler will complain with: Error: non-constant .uleb128 is not supported This is because `-g` defaults to the compiler debug info default. If the assembler does not support some of the directives used, the above errors occur. To fix, remove the explicit passing of `-g`. All the test wants is that stack traces print valid function names, and debug info is not required for that. (I currently cannot recall why I added the explicit `-g`.) Link: https://lkml.kernel.org/r/20230316224705.709984-2-elver@google.com Fixes: 1fe84fd4a402 ("kcsan: Add test suite") Signed-off-by: Marco Elver Reported-by: Nathan Chancellor Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton --- kernel/kcsan/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile index 8cf70f068d92..a45f3dfc8d14 100644 --- a/kernel/kcsan/Makefile +++ b/kernel/kcsan/Makefile @@ -16,6 +16,6 @@ obj-y := core.o debugfs.o report.o KCSAN_INSTRUMENT_BARRIERS_selftest.o := y obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o -CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -g -fno-omit-frame-pointer +CFLAGS_kcsan_test.o := $(CFLAGS_KCSAN) -fno-omit-frame-pointer CFLAGS_kcsan_test.o += $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_KCSAN_KUNIT_TEST) += kcsan_test.o -- cgit From 5aa360971beaadf51f099fb7904fa4807b7d39cd Mon Sep 17 00:00:00 2001 From: Richard Leitner Date: Thu, 16 Mar 2023 11:25:25 +0100 Subject: mailmap: add entries for Richard Leitner Map all my old email addresses to my current address. Link: https://lkml.kernel.org/r/20230316-my-mailmap-v1-1-76bc3a36ba41@linux.dev Signed-off-by: Richard Leitner Signed-off-by: Andrew Morton --- .mailmap | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.mailmap b/.mailmap index cebd939c3a91..e2af78f67f7c 100644 --- a/.mailmap +++ b/.mailmap @@ -390,6 +390,9 @@ Rémi Denis-Courmont Ricardo Ribalda Ricardo Ribalda Ricardo Ribalda Delgado Ricardo Ribalda +Richard Leitner +Richard Leitner +Richard Leitner Robert Foss Roman Gushchin Roman Gushchin -- cgit From d0072ca529674c36421023ffe90837a7de9387f3 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 11 Mar 2023 08:18:00 +0900 Subject: mm: mmap: remove newline at the end of the trace We already have newline in TP_printk so remove the redundant newline character at the end of the mmap trace. <...>-345 [006] ..... 95.589290: exit_mmap: mt_mod ... <...>-345 [006] ..... 95.589413: vm_unmapped_area: addr=... <...>-345 [006] ..... 95.589571: vm_unmapped_area: addr=... <...>-345 [006] ..... 95.589606: vm_unmapped_area: addr=... to <...>-336 [006] ..... 44.762506: exit_mmap: mt_mod ... <...>-336 [006] ..... 44.762654: vm_unmapped_area: addr=... <...>-336 [006] ..... 44.762794: vm_unmapped_area: addr=... <...>-336 [006] ..... 44.762835: vm_unmapped_area: addr=... Link: https://lkml.kernel.org/r/ZAu6qDsNPmk82UjV@minwoo-desktop FIxes: df529cabb7a25 ("mm: mmap: add trace point of vm_unmapped_area") Signed-off-by: Minwoo Im Acked-by: Steven Rostedt (Google) Reviewed-by: Mukesh Ojha Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/trace/events/mmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h index 216de5f03621..f8d61485de16 100644 --- a/include/trace/events/mmap.h +++ b/include/trace/events/mmap.h @@ -35,7 +35,7 @@ TRACE_EVENT(vm_unmapped_area, __entry->align_offset = info->align_offset; ), - TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n", + TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx", IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr, IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0, __entry->total_vm, __entry->flags, __entry->length, @@ -110,7 +110,7 @@ TRACE_EVENT(exit_mmap, __entry->mt = &mm->mm_mt; ), - TP_printk("mt_mod %p, DESTROY\n", + TP_printk("mt_mod %p, DESTROY", __entry->mt ) ); -- cgit From b20cf3f89c56b5f6a38b7f76a8128bf9f291bbd3 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Fri, 24 Mar 2023 09:06:58 +0800 Subject: platform/chrome: cros_ec_chardev: fix kernel data leak from ioctl It is possible to peep kernel page's data by providing larger `insize` in struct cros_ec_command[1] when invoking EC host commands. Fix it by using zeroed memory. [1]: https://elixir.bootlin.com/linux/v6.2/source/include/linux/platform_data/cros_ec_proto.h#L74 Fixes: eda2e30c6684 ("mfd / platform: cros_ec: Miscellaneous character device to talk with the EC") Signed-off-by: Tzung-Bi Shih Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20230324010658.1082361-1-tzungbi@kernel.org --- drivers/platform/chrome/cros_ec_chardev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/chrome/cros_ec_chardev.c b/drivers/platform/chrome/cros_ec_chardev.c index 0de7c255254e..d6de5a294128 100644 --- a/drivers/platform/chrome/cros_ec_chardev.c +++ b/drivers/platform/chrome/cros_ec_chardev.c @@ -284,7 +284,7 @@ static long cros_ec_chardev_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg) u_cmd.insize > EC_MAX_MSG_BYTES) return -EINVAL; - s_cmd = kmalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize), + s_cmd = kzalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize), GFP_KERNEL); if (!s_cmd) return -ENOMEM; -- cgit From 39b291b86b5988bf8753c3874d5c773399d09b96 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 23 Mar 2023 21:15:52 +0900 Subject: ksmbd: return unsupported error on smb1 mount ksmbd disconnect connection when mounting with vers=smb1. ksmbd should send smb1 negotiate response to client for correct unsupported error return. This patch add needed SMB1 macros and fill NegProt part of the response for smb1 negotiate response. Cc: stable@vger.kernel.org Reported-by: Steve French Reviewed-by: Sergey Senozhatsky Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/connection.c | 7 ++----- fs/ksmbd/smb_common.c | 23 ++++++++++++++++++++--- fs/ksmbd/smb_common.h | 30 ++++++++---------------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 5d914715605f..115a67d2cf78 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -319,13 +319,10 @@ int ksmbd_conn_handler_loop(void *p) } /* - * Check if pdu size is valid (min : smb header size, - * max : 0x00FFFFFF). + * Check maximum pdu size(0x00FFFFFF). */ - if (pdu_size < __SMB2_HEADER_STRUCTURE_SIZE || - pdu_size > MAX_STREAM_PROT_LEN) { + if (pdu_size > MAX_STREAM_PROT_LEN) break; - } /* 4 for rfc1002 length field */ size = pdu_size + 4; diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index 079c9e76818d..9c1ce6d199ce 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -442,9 +442,26 @@ static int smb_handle_negotiate(struct ksmbd_work *work) { struct smb_negotiate_rsp *neg_rsp = work->response_buf; - ksmbd_debug(SMB, "Unsupported SMB protocol\n"); - neg_rsp->hdr.Status.CifsError = STATUS_INVALID_LOGON_TYPE; - return -EINVAL; + ksmbd_debug(SMB, "Unsupported SMB1 protocol\n"); + + /* + * Remove 4 byte direct TCP header, add 2 byte bcc and + * 2 byte DialectIndex. + */ + *(__be32 *)work->response_buf = + cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2 + 2); + neg_rsp->hdr.Status.CifsError = STATUS_SUCCESS; + + neg_rsp->hdr.Command = SMB_COM_NEGOTIATE; + *(__le32 *)neg_rsp->hdr.Protocol = SMB1_PROTO_NUMBER; + neg_rsp->hdr.Flags = SMBFLG_RESPONSE; + neg_rsp->hdr.Flags2 = SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS | + SMBFLG2_EXT_SEC | SMBFLG2_IS_LONG_NAME; + + neg_rsp->hdr.WordCount = 1; + neg_rsp->DialectIndex = cpu_to_le16(work->conn->dialect); + neg_rsp->ByteCount = 0; + return 0; } int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command) diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h index e663ab9ea759..d30ce4c1a151 100644 --- a/fs/ksmbd/smb_common.h +++ b/fs/ksmbd/smb_common.h @@ -158,8 +158,15 @@ #define SMB1_PROTO_NUMBER cpu_to_le32(0x424d53ff) #define SMB_COM_NEGOTIATE 0x72 - #define SMB1_CLIENT_GUID_SIZE (16) + +#define SMBFLG_RESPONSE 0x80 /* this PDU is a response from server */ + +#define SMBFLG2_IS_LONG_NAME cpu_to_le16(0x40) +#define SMBFLG2_EXT_SEC cpu_to_le16(0x800) +#define SMBFLG2_ERR_STATUS cpu_to_le16(0x4000) +#define SMBFLG2_UNICODE cpu_to_le16(0x8000) + struct smb_hdr { __be32 smb_buf_length; __u8 Protocol[4]; @@ -199,28 +206,7 @@ struct smb_negotiate_req { struct smb_negotiate_rsp { struct smb_hdr hdr; /* wct = 17 */ __le16 DialectIndex; /* 0xFFFF = no dialect acceptable */ - __u8 SecurityMode; - __le16 MaxMpxCount; - __le16 MaxNumberVcs; - __le32 MaxBufferSize; - __le32 MaxRawSize; - __le32 SessionKey; - __le32 Capabilities; /* see below */ - __le32 SystemTimeLow; - __le32 SystemTimeHigh; - __le16 ServerTimeZone; - __u8 EncryptionKeyLength; __le16 ByteCount; - union { - unsigned char EncryptionKey[8]; /* cap extended security off */ - /* followed by Domain name - if extended security is off */ - /* followed by 16 bytes of server GUID */ - /* then security blob if cap_extended_security negotiated */ - struct { - unsigned char GUID[SMB1_CLIENT_GUID_SIZE]; - unsigned char SecurityBlob[1]; - } __packed extended_response; - } __packed u; } __packed; struct filesystem_attribute_info { -- cgit From 08570b7c8db6d9185deccf5bcda773bd6f17172f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 27 Jan 2023 23:14:00 +0100 Subject: gpu: host1x: fix uninitialized variable use The error handling for platform_get_irq() failing no longer works after a recent change, clang now points this out with a warning: drivers/gpu/host1x/dev.c:520:6: error: variable 'syncpt_irq' is uninitialized when used here [-Werror,-Wuninitialized] if (syncpt_irq < 0) ^~~~~~~~~~ Fix this by removing the variable and checking the correct error status. Fixes: 625d4ffb438c ("gpu: host1x: Rewrite syncpoint interrupt handling") Reviewed-by: Nathan Chancellor Reviewed-by: Mikko Perttunen Reported-by: "kernelci.org bot" Reviewed-by: Nick Desaulniers Reviewed-by: Jon Hunter Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230127221418.2522612-1-arnd@kernel.org --- drivers/gpu/host1x/dev.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 4872d183d860..aae2efeef503 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -487,7 +487,6 @@ static int host1x_get_resets(struct host1x *host) static int host1x_probe(struct platform_device *pdev) { struct host1x *host; - int syncpt_irq; int err; host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); @@ -517,8 +516,8 @@ static int host1x_probe(struct platform_device *pdev) } host->syncpt_irq = platform_get_irq(pdev, 0); - if (syncpt_irq < 0) - return syncpt_irq; + if (host->syncpt_irq < 0) + return host->syncpt_irq; mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); -- cgit From fddc6ccc487e5de07b98df8d04118d5dcb5e0407 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 17 Mar 2023 12:51:17 +0000 Subject: cifs: append path to open_enter trace event We do not dump the file path for smb3_open_enter ftrace calls, which is a severe handicap while debugging using ftrace evens. This change adds that info. Unfortunately, we're not updating the path in open params in many places; which I had to do as a part of this change. SMB2_open gets path in utf16 format, but it's easier of path is supplied as char pointer in oparms. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/cached_dir.c | 1 + fs/cifs/link.c | 2 ++ fs/cifs/smb2inode.c | 1 + fs/cifs/smb2ops.c | 11 +++++++++++ fs/cifs/smb2pdu.c | 4 ++-- fs/cifs/trace.h | 12 ++++++++---- 6 files changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c index 75d5e06306ea..71fabb4c09a4 100644 --- a/fs/cifs/cached_dir.c +++ b/fs/cifs/cached_dir.c @@ -184,6 +184,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .create_options = cifs_create_options(cifs_sb, CREATE_NOT_FILE), .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 7d97c10f2453..c66be4904e1f 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -360,6 +360,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, + .path = path, .desired_access = GENERIC_READ, .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR), .disposition = FILE_OPEN, @@ -427,6 +428,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, .cifs_sb = cifs_sb, + .path = path, .desired_access = GENERIC_WRITE, .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR), .disposition = FILE_CREATE, diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 8dd3791b5c53..163a03298430 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -107,6 +107,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, vars->oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = full_path, .desired_access = desired_access, .disposition = create_disposition, .create_options = cifs_create_options(cifs_sb, create_options), diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f7e18ab7ee9c..a81758225fcd 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -745,6 +745,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = "", .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -788,6 +789,7 @@ smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = "", .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -835,6 +837,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = full_path, .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -1119,6 +1122,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = FILE_WRITE_EA, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2110,6 +2114,7 @@ smb3_notify(const unsigned int xid, struct file *pfile, tcon = cifs_sb_master_tcon(cifs_sb); oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2182,6 +2187,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2514,6 +2520,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = desired_access, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2648,6 +2655,7 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = "", .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), @@ -2942,6 +2950,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = full_path, .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, create_options), @@ -3082,6 +3091,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = full_path, .desired_access = FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, OPEN_REPARSE_POINT), @@ -3222,6 +3232,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, oparms = (struct cifs_open_parms) { .tcon = tcon, + .path = path, .desired_access = READ_CONTROL, .disposition = FILE_OPEN, /* diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0e53265e1462..caeb92a69b5f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2705,7 +2705,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, rqst.rq_nvec = n_iov; /* no need to inc num_remote_opens because we close it just below */ - trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, CREATE_NOT_FILE, + trace_smb3_posix_mkdir_enter(xid, tcon->tid, ses->Suid, full_path, CREATE_NOT_FILE, FILE_WRITE_ATTRIBUTES); /* resource #4: response buffer */ rc = cifs_send_recv(xid, ses, server, @@ -2973,7 +2973,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (rc) goto creat_exit; - trace_smb3_open_enter(xid, tcon->tid, tcon->ses->Suid, + trace_smb3_open_enter(xid, tcon->tid, tcon->ses->Suid, oparms->path, oparms->create_options, oparms->desired_access); rc = cifs_send_recv(xid, ses, server, diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index 110070ba8b04..d3053bd8ae73 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -701,13 +701,15 @@ DECLARE_EVENT_CLASS(smb3_open_enter_class, TP_PROTO(unsigned int xid, __u32 tid, __u64 sesid, + const char *full_path, int create_options, int desired_access), - TP_ARGS(xid, tid, sesid, create_options, desired_access), + TP_ARGS(xid, tid, sesid, full_path, create_options, desired_access), TP_STRUCT__entry( __field(unsigned int, xid) __field(__u32, tid) __field(__u64, sesid) + __string(path, full_path) __field(int, create_options) __field(int, desired_access) ), @@ -715,11 +717,12 @@ DECLARE_EVENT_CLASS(smb3_open_enter_class, __entry->xid = xid; __entry->tid = tid; __entry->sesid = sesid; + __assign_str(path, full_path); __entry->create_options = create_options; __entry->desired_access = desired_access; ), - TP_printk("xid=%u sid=0x%llx tid=0x%x cr_opts=0x%x des_access=0x%x", - __entry->xid, __entry->sesid, __entry->tid, + TP_printk("xid=%u sid=0x%llx tid=0x%x path=%s cr_opts=0x%x des_access=0x%x", + __entry->xid, __entry->sesid, __entry->tid, __get_str(path), __entry->create_options, __entry->desired_access) ) @@ -728,9 +731,10 @@ DEFINE_EVENT(smb3_open_enter_class, smb3_##name, \ TP_PROTO(unsigned int xid, \ __u32 tid, \ __u64 sesid, \ + const char *full_path, \ int create_options, \ int desired_access), \ - TP_ARGS(xid, tid, sesid, create_options, desired_access)) + TP_ARGS(xid, tid, sesid, full_path, create_options, desired_access)) DEFINE_SMB3_OPEN_ENTER_EVENT(open_enter); DEFINE_SMB3_OPEN_ENTER_EVENT(posix_mkdir_enter); -- cgit From bc962159e8e326af634a506508034a375bf2b858 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 20 Mar 2023 06:08:19 +0000 Subject: cifs: avoid race conditions with parallel reconnects When multiple processes/channels do reconnects in parallel we used to return success immediately negotiate/session-setup/tree-connect, causing race conditions between processes that enter the function in parallel. This caused several errors related to session not found to show up during parallel reconnects. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/connect.c | 48 ++++++++++++++++++++++++++++++++++++------------ fs/cifs/smb2pdu.c | 44 ++++++++++++++++++++++++++------------------ fs/cifs/smb2transport.c | 17 ++++++++++++++--- 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f42cc7077312..c3162ef9c9e9 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -212,31 +212,42 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, cifs_chan_update_iface(ses, server); spin_lock(&ses->chan_lock); - if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) - goto next_session; + if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) { + spin_unlock(&ses->chan_lock); + continue; + } if (mark_smb_session) CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses); else cifs_chan_set_need_reconnect(ses, server); + cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n", + __func__, ses->chans_need_reconnect); + /* If all channels need reconnect, then tcon needs reconnect */ - if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) - goto next_session; + if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { + spin_unlock(&ses->chan_lock); + continue; + } + spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); ses->ses_status = SES_NEED_RECON; + spin_unlock(&ses->ses_lock); list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { tcon->need_reconnect = true; + spin_lock(&tcon->tc_lock); tcon->status = TID_NEED_RECON; + spin_unlock(&tcon->tc_lock); } if (ses->tcon_ipc) { ses->tcon_ipc->need_reconnect = true; + spin_lock(&ses->tcon_ipc->tc_lock); ses->tcon_ipc->status = TID_NEED_RECON; + spin_unlock(&ses->tcon_ipc->tc_lock); } - -next_session: - spin_unlock(&ses->chan_lock); } spin_unlock(&cifs_tcp_ses_lock); } @@ -3653,11 +3664,19 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, /* only send once per connect */ spin_lock(&server->srv_lock); - if (!server->ops->need_neg(server) || + if (server->tcpStatus != CifsGood && + server->tcpStatus != CifsNew && server->tcpStatus != CifsNeedNegotiate) { + spin_unlock(&server->srv_lock); + return -EHOSTDOWN; + } + + if (!server->ops->need_neg(server) && + server->tcpStatus == CifsGood) { spin_unlock(&server->srv_lock); return 0; } + server->tcpStatus = CifsInNegotiate; spin_unlock(&server->srv_lock); @@ -3691,23 +3710,28 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, bool is_binding = false; spin_lock(&ses->ses_lock); + cifs_dbg(FYI, "%s: channel connect bitmap: 0x%lx\n", + __func__, ses->chans_need_reconnect); + if (ses->ses_status != SES_GOOD && ses->ses_status != SES_NEW && ses->ses_status != SES_NEED_RECON) { spin_unlock(&ses->ses_lock); - return 0; + return -EHOSTDOWN; } /* only send once per connect */ spin_lock(&ses->chan_lock); - if (CIFS_ALL_CHANS_GOOD(ses) || - cifs_chan_in_reconnect(ses, server)) { + if (CIFS_ALL_CHANS_GOOD(ses)) { + if (ses->ses_status == SES_NEED_RECON) + ses->ses_status = SES_GOOD; spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); return 0; } - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + cifs_chan_set_in_reconnect(ses, server); + is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); spin_unlock(&ses->chan_lock); if (!is_binding) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index caeb92a69b5f..a9fb95b7ef82 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -203,6 +203,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, } spin_unlock(&server->srv_lock); +again: rc = cifs_wait_for_server_reconnect(server, tcon->retry); if (rc) return rc; @@ -221,6 +222,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, nls_codepage = load_nls_default(); + mutex_lock(&ses->session_mutex); /* * Recheck after acquire mutex. If another thread is negotiating * and the server never sends an answer the socket will be closed @@ -229,6 +231,11 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, spin_lock(&server->srv_lock); if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&server->srv_lock); + mutex_unlock(&ses->session_mutex); + + if (tcon->retry) + goto again; + rc = -EHOSTDOWN; goto out; } @@ -238,19 +245,22 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, * need to prevent multiple threads trying to simultaneously * reconnect the same SMB session */ + spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); - if (!cifs_chan_needs_reconnect(ses, server)) { + if (!cifs_chan_needs_reconnect(ses, server) && + ses->ses_status == SES_GOOD) { spin_unlock(&ses->chan_lock); - + spin_unlock(&ses->ses_lock); /* this means that we only need to tree connect */ if (tcon->need_reconnect) goto skip_sess_setup; + mutex_unlock(&ses->session_mutex); goto out; } spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); - mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(0, ses, server); if (!rc) { rc = cifs_setup_session(0, ses, server, nls_codepage); @@ -266,10 +276,8 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, mutex_unlock(&ses->session_mutex); goto out; } - mutex_unlock(&ses->session_mutex); skip_sess_setup: - mutex_lock(&ses->session_mutex); if (!tcon->need_reconnect) { mutex_unlock(&ses->session_mutex); goto out; @@ -284,7 +292,7 @@ skip_sess_setup: cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) { /* If sess reconnected but tcon didn't, something strange ... */ - pr_warn_once("reconnect tcon failed rc = %d\n", rc); + cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc); goto out; } @@ -1256,9 +1264,9 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) if (rc) return rc; - spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); - spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); + is_binding = (ses->ses_status == SES_GOOD); + spin_unlock(&ses->ses_lock); if (is_binding) { req->hdr.SessionId = cpu_to_le64(ses->Suid); @@ -1416,9 +1424,9 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) goto out_put_spnego_key; } - spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); - spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); + is_binding = (ses->ses_status == SES_GOOD); + spin_unlock(&ses->ses_lock); /* keep session key if binding */ if (!is_binding) { @@ -1542,9 +1550,9 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n"); - spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); - spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); + is_binding = (ses->ses_status == SES_GOOD); + spin_unlock(&ses->ses_lock); /* keep existing ses id and flags if binding */ if (!is_binding) { @@ -1610,9 +1618,9 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base; - spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); - spin_unlock(&ses->chan_lock); + spin_lock(&ses->ses_lock); + is_binding = (ses->ses_status == SES_GOOD); + spin_unlock(&ses->ses_lock); /* keep existing ses id and flags if binding */ if (!is_binding) { diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index d827b7547ffa..790acf65a092 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -81,6 +81,7 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) struct cifs_ses *ses = NULL; int i; int rc = 0; + bool is_binding = false; spin_lock(&cifs_tcp_ses_lock); @@ -97,9 +98,12 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) goto out; found: + spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); - if (cifs_chan_needs_reconnect(ses, server) && - !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { + + is_binding = (cifs_chan_needs_reconnect(ses, server) && + ses->ses_status == SES_GOOD); + if (is_binding) { /* * If we are in the process of binding a new channel * to an existing session, use the master connection @@ -107,6 +111,7 @@ found: */ memcpy(key, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); goto out; } @@ -119,10 +124,12 @@ found: if (chan->server == server) { memcpy(key, chan->signkey, SMB3_SIGN_KEY_SIZE); spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); goto out; } } spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); cifs_dbg(VFS, "%s: Could not find channel signing key for session 0x%llx\n", @@ -392,11 +399,15 @@ generate_smb3signingkey(struct cifs_ses *ses, bool is_binding = false; int chan_index = 0; + spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); - is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses); + is_binding = (cifs_chan_needs_reconnect(ses, server) && + ses->ses_status == SES_GOOD); + chan_index = cifs_ses_get_chan_index(ses, server); /* TODO: introduce ref counting for channels when the can be freed */ spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); /* * All channels use the same encryption/decryption keys but -- cgit From fcde88af6a783d32e735dd2615528e2bf7a0f533 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 18 Mar 2023 20:58:40 -0700 Subject: xfs: pass the correct cursor to xfs_iomap_prealloc_size In xfs_buffered_write_iomap_begin, @icur is the iext cursor for the data fork and @ccur is the cursor for the cow fork. Pass in whichever cursor corresponds to allocfork, because otherwise the xfs_iext_prev_extent call can use the data fork cursor to walk off the end of the cow fork structure. Best case it returns the wrong results, worst case it does this: stack segment: 0000 [#1] PREEMPT SMP CPU: 2 PID: 3141909 Comm: fsstress Tainted: G W 6.3.0-rc2-xfsx #6.3.0-rc2 7bf5cc2e98997627cae5c930d890aba3aeec65dd Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20171121_152543-x86-ol7-builder-01.us.oracle.com-4.el7.1 04/01/2014 RIP: 0010:xfs_iext_prev+0x71/0x150 [xfs] RSP: 0018:ffffc90002233aa8 EFLAGS: 00010297 RAX: 000000000000000f RBX: 000000000000000e RCX: 000000000000000c RDX: 0000000000000002 RSI: 000000000000000e RDI: ffff8883d0019ba0 RBP: 989642409af8a7a7 R08: ffffea0000000001 R09: 0000000000000002 R10: 0000000000000000 R11: 000000000000000c R12: ffffc90002233b00 R13: ffff8883d0019ba0 R14: 989642409af8a6bf R15: 000ffffffffe0000 FS: 00007fdf8115f740(0000) GS:ffff88843fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdf8115e000 CR3: 0000000357256000 CR4: 00000000003506e0 Call Trace: xfs_iomap_prealloc_size.constprop.0.isra.0+0x1a6/0x410 [xfs 619a268fb2406d68bd34e007a816b27e70abc22c] xfs_buffered_write_iomap_begin+0xa87/0xc60 [xfs 619a268fb2406d68bd34e007a816b27e70abc22c] iomap_iter+0x132/0x2f0 iomap_file_buffered_write+0x92/0x330 xfs_file_buffered_write+0xb1/0x330 [xfs 619a268fb2406d68bd34e007a816b27e70abc22c] vfs_write+0x2eb/0x410 ksys_write+0x65/0xe0 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Found by xfs/538 in alwayscow mode, but this doesn't seem particular to that test. Fixes: 590b16516ef3 ("xfs: refactor xfs_iomap_prealloc_size") Actually-Fixes: 66ae56a53f0e ("xfs: introduce an always_cow mode") Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 69dbe7814128..285885c308bd 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1090,9 +1090,12 @@ xfs_buffered_write_iomap_begin( */ if (xfs_has_allocsize(mp)) prealloc_blocks = mp->m_allocsize_blocks; - else + else if (allocfork == XFS_DATA_FORK) prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, offset, count, &icur); + else + prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, + offset, count, &ccur); if (prealloc_blocks) { xfs_extlen_t align; xfs_off_t end_offset; -- cgit From e2e63b071b2da53ad6a154e34c387bb064137f74 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Mar 2023 16:33:20 -0700 Subject: xfs: clear incore AGFL_RESET state if it's not needed Prior to commit 7ac2ff8bb371, when we loaded the incore perag structure with information from the AGF header, we would set or clear the pagf_agfl_reset field based on whether or not the AGFL list was misaligned within the block. IOWs, it's an incore state bit that's supposed to cache something in the ondisk metadata. Therefore, the code still needs to support clearing the incore bit if (somehow) the AGFL were to correct itself. It turns out that xfs_repair does exactly this -- phase 4 loads the AGF to scan the rmapbt for corrupt records, which can set NEEDS_AGFL_RESET. The scan unsets AGF_INIT but doesn't unset NEEDS_AGFL_RESET. Phase 5 totally rewrites the AGFL and fixes the alignment problem, didn't clear NEEDS_AGFL_RESET historically, and reloads the perag state to fix the freelist. This results in the AGFL being reset based on stale data, which then causes the new AGFL blocks to be leaked. A subsequent xfs_repair -n then complains about the leaks. One could argue that phase 5 ought to clear this bit directly when it reloads the perag AGF data after rewriting the AGFL, but libxfs used to handle this for us, so it should go back to doing that. Found by fuzzing flfirst = ones in xfs/352. Fixes: 7ac2ff8bb371 ("xfs: perags need atomic operational state") Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 55ae08a6144c..badc213384a4 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3045,6 +3045,8 @@ xfs_alloc_read_agf( pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); if (xfs_agfl_needs_reset(pag->pag_mount, agf)) set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); + else + clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); /* * Update the in-core allocbt counter. Filter out the rmapbt -- cgit From 2d0ab14634a26e54f8d6d231b47b7ef233e84599 Mon Sep 17 00:00:00 2001 From: Aymeric Wibo Date: Sun, 19 Mar 2023 03:12:05 +0100 Subject: ACPI: resource: Add Medion S17413 to IRQ override quirk Add DMI info of the Medion S17413 (board M1xA) to the IRQ override quirk table. This fixes the keyboard not working on these laptops. Link: https://bugzilla.kernel.org/show_bug.cgi?id=213031 Signed-off-by: Aymeric Wibo [ rjw: Fixed up white space ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 7c9125df5a65..7b4801ce62d6 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -400,6 +400,13 @@ static const struct dmi_system_id medion_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "M17T"), }, }, + { + .ident = "MEDION S17413", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), + DMI_MATCH(DMI_BOARD_NAME, "M1xA"), + }, + }, { } }; -- cgit From c24bb1a87dc3f2d77d410eaac2c6a295961bf50e Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 24 Mar 2023 16:05:19 -0300 Subject: cifs: fix missing unload_nls() in smb2_reconnect() Make sure to unload_nls() @nls_codepage if we no longer need it. Fixes: bc962159e8e3 ("cifs: avoid race conditions with parallel reconnects") Signed-off-by: Paulo Alcantara (SUSE) Cc: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a9fb95b7ef82..20af1af34fa5 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -144,7 +144,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, struct TCP_Server_Info *server) { int rc = 0; - struct nls_table *nls_codepage; + struct nls_table *nls_codepage = NULL; struct cifs_ses *ses; /* @@ -220,8 +220,6 @@ again: tcon->ses->chans_need_reconnect, tcon->need_reconnect); - nls_codepage = load_nls_default(); - mutex_lock(&ses->session_mutex); /* * Recheck after acquire mutex. If another thread is negotiating @@ -241,6 +239,8 @@ again: } spin_unlock(&server->srv_lock); + nls_codepage = load_nls_default(); + /* * need to prevent multiple threads trying to simultaneously * reconnect the same SMB session -- cgit From 7e0e76d99079be13c9961dde7c93b2d1ee665af4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 23 Mar 2023 15:10:26 -0500 Subject: smb3: lower default deferred close timeout to address perf regression Performance tests with large number of threads noted that the change of the default closetimeo (deferred close timeout between when close is done by application and when client has to send the close to the server), to 5 seconds from 1 second, significantly degraded perf in some cases like this (in the filebench example reported, the stats show close requests on the wire taking twice as long, and 50% regression in filebench perf). This is stil configurable via mount parm closetimeo, but to be safe, decrease default back to its previous value of 1 second. Reported-by: Yin Fengwei Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/997614df-10d4-af53-9571-edec36b0e2f3@intel.com/ Fixes: 5efdd9122eff ("smb3: allow deferred close timeout to be configurable") Cc: stable@vger.kernel.org # 6.0+ Tested-by: Yin Fengwei Reviewed-by: Paulo Alcantara (SUSE) Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/fs_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index 1b8d4e27f831..3de00e7127ec 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -286,5 +286,5 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); * max deferred close timeout (jiffies) - 2^30 */ #define SMB3_MAX_DCLOSETIMEO (1 << 30) -#define SMB3_DEF_DCLOSETIMEO (5 * HZ) /* Can increase later, other clients use larger */ +#define SMB3_DEF_DCLOSETIMEO (1 * HZ) /* even 1 sec enough to help eg open/write/close/open/read */ #endif -- cgit From be4fde79812f02914e350bde0bc4cfeae8429378 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 24 Mar 2023 13:56:33 -0300 Subject: cifs: fix dentry lookups in directory handle cache Get rid of any prefix paths in @path before lookup_positive_unlocked() as it will call ->lookup() which already adds those prefix paths through build_path_from_dentry(). This has caused a performance regression when mounting shares with a prefix path where readdir(2) would end up retrying several times to open bad directory names that contained duplicate prefix paths. Fix this by skipping any prefix paths in @path before calling lookup_positive_unlocked(). Fixes: e4029e072673 ("cifs: find and use the dentry for cached non-root directories also") Cc: stable@vger.kernel.org # 6.1+ Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cached_dir.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c index 71fabb4c09a4..bfc964b36c72 100644 --- a/fs/cifs/cached_dir.c +++ b/fs/cifs/cached_dir.c @@ -99,6 +99,23 @@ path_to_dentry(struct cifs_sb_info *cifs_sb, const char *path) return dentry; } +static const char *path_no_prefix(struct cifs_sb_info *cifs_sb, + const char *path) +{ + size_t len = 0; + + if (!*path) + return path; + + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + cifs_sb->prepath) { + len = strlen(cifs_sb->prepath) + 1; + if (unlikely(len > strlen(path))) + return ERR_PTR(-EINVAL); + } + return path + len; +} + /* * Open the and cache a directory handle. * If error then *cfid is not initialized. @@ -125,6 +142,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, struct dentry *dentry = NULL; struct cached_fid *cfid; struct cached_fids *cfids; + const char *npath; if (tcon == NULL || tcon->cfids == NULL || tcon->nohandlecache || is_smb1_server(tcon->ses->server)) @@ -160,6 +178,20 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return 0; } + /* + * Skip any prefix paths in @path as lookup_positive_unlocked() ends up + * calling ->lookup() which already adds those through + * build_path_from_dentry(). Also, do it earlier as we might reconnect + * below when trying to send compounded request and then potentially + * having a different prefix path (e.g. after DFS failover). + */ + npath = path_no_prefix(cifs_sb, path); + if (IS_ERR(npath)) { + rc = PTR_ERR(npath); + kfree(utf16_path); + return rc; + } + /* * We do not hold the lock for the open because in case * SMB2_open needs to reconnect. @@ -252,10 +284,10 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, (char *)&cfid->file_all_info)) cfid->file_all_info_is_valid = true; - if (!path[0]) + if (!npath[0]) dentry = dget(cifs_sb->root); else { - dentry = path_to_dentry(cifs_sb, path); + dentry = path_to_dentry(cifs_sb, npath); if (IS_ERR(dentry)) { rc = -ENOENT; goto oshr_free; -- cgit From 491eafce1a51c457701351a4bf40733799745314 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 23 Mar 2023 16:20:02 -0500 Subject: smb3: fix unusable share after force unmount failure If user does forced unmount ("umount -f") while files are still open on the share (as was seen in a Kubernetes example running on SMB3.1.1 mount) then we were marking the share as "TID_EXITING" in umount_begin() which caused all subsequent operations (except write) to fail ... but unfortunately when umount_begin() is called we do not know yet that there are open files or active references on the share that would prevent unmount from succeeding. Kubernetes had example when they were doing umount -f when files were open which caused the share to become unusable until the files were closed (and the umount retried). Fix this so that TID_EXITING is not set until we are about to send the tree disconnect (not at the beginning of forced umounts in umount_begin) so that if "umount -f" fails (due to open files or references) the mount is still usable. Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 9 ++++++--- fs/cifs/cifssmb.c | 6 ++---- fs/cifs/connect.c | 1 + fs/cifs/smb2pdu.c | 8 ++------ 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index cbcf210d56e4..ac9034fce409 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -731,13 +731,16 @@ static void cifs_umount_begin(struct super_block *sb) spin_lock(&tcon->tc_lock); if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) { /* we have other mounts to same share or we have - already tried to force umount this and woken up + already tried to umount this and woken up all waiting network requests, nothing to do */ spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); return; - } else if (tcon->tc_count == 1) - tcon->status = TID_EXITING; + } + /* + * can not set tcon->status to TID_EXITING yet since we don't know if umount -f will + * fail later (e.g. due to open files). TID_EXITING will be set just before tdis req sent + */ spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a43c78396dd8..38a697eca305 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -86,13 +86,11 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) /* * only tree disconnect, open, and write, (and ulogoff which does not - * have tcon) are allowed as we start force umount + * have tcon) are allowed as we start umount */ spin_lock(&tcon->tc_lock); if (tcon->status == TID_EXITING) { - if (smb_command != SMB_COM_WRITE_ANDX && - smb_command != SMB_COM_OPEN_ANDX && - smb_command != SMB_COM_TREE_DISCONNECT) { + if (smb_command != SMB_COM_TREE_DISCONNECT) { spin_unlock(&tcon->tc_lock); cifs_dbg(FYI, "can not send cmd %d while umounting\n", smb_command); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c3162ef9c9e9..1cbb90587995 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2324,6 +2324,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) WARN_ON(tcon->tc_count < 0); list_del_init(&tcon->tcon_list); + tcon->status = TID_EXITING; spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 20af1af34fa5..6bd2aa6af18f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -165,13 +165,9 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, spin_lock(&tcon->tc_lock); if (tcon->status == TID_EXITING) { /* - * only tree disconnect, open, and write, - * (and ulogoff which does not have tcon) - * are allowed as we start force umount. + * only tree disconnect allowed when disconnecting ... */ - if ((smb2_command != SMB2_WRITE) && - (smb2_command != SMB2_CREATE) && - (smb2_command != SMB2_TREE_DISCONNECT)) { + if (smb2_command != SMB2_TREE_DISCONNECT) { spin_unlock(&tcon->tc_lock); cifs_dbg(FYI, "can not send cmd %d while umounting\n", smb2_command); -- cgit From 4dfb02d5cae80289384c4d3c6ddfbd92d30aced9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 24 Mar 2023 13:14:48 -0700 Subject: xfs: fix mismerged tracepoints At some point in between sending this patch to the list and merging it into for-next, the tracepoints got all mixed up because I've over-reliant on automated tools not sucking. The end result is that the tracepoints are all wrong, so fix them. Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index badc213384a4..203f16c48c19 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3413,7 +3413,7 @@ xfs_alloc_vextent_start_ag( args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; - trace_xfs_alloc_vextent_first_ag(args); + trace_xfs_alloc_vextent_start_ag(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { @@ -3466,7 +3466,7 @@ xfs_alloc_vextent_first_ag( args->agno = NULLAGNUMBER; args->agbno = NULLAGBLOCK; - trace_xfs_alloc_vextent_start_ag(args); + trace_xfs_alloc_vextent_first_ag(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { @@ -3500,7 +3500,7 @@ xfs_alloc_vextent_exact_bno( args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); - trace_xfs_alloc_vextent_near_bno(args); + trace_xfs_alloc_vextent_exact_bno(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { @@ -3538,7 +3538,7 @@ xfs_alloc_vextent_near_bno( args->agno = XFS_FSB_TO_AGNO(mp, target); args->agbno = XFS_FSB_TO_AGBNO(mp, target); - trace_xfs_alloc_vextent_exact_bno(args); + trace_xfs_alloc_vextent_near_bno(args); error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); if (error) { -- cgit From 197b6b60ae7bc51dd0814953c562833143b292aa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Mar 2023 14:40:20 -0700 Subject: Linux 6.3-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a2c310df2145..da2586d4c728 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit From 0acc7239c20a8401b8968c2adace8f7c9b0295ae Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 27 Mar 2023 16:47:44 +0000 Subject: KVM: arm64: Avoid vcpu->mutex v. kvm->lock inversion in CPU_ON KVM/arm64 had the lock ordering backwards on vcpu->mutex and kvm->lock from the very beginning. One such example is the way vCPU resets are handled: the kvm->lock is acquired while handling a guest CPU_ON PSCI call. Add a dedicated lock to serialize writes to kvm_vcpu_arch::{mp_state, reset_state}. Promote all accessors of mp_state to {READ,WRITE}_ONCE() as readers do not acquire the mp_state_lock. While at it, plug yet another race by taking the mp_state_lock in the KVM_SET_MP_STATE ioctl handler. As changes to MP state are now guarded with a dedicated lock, drop the kvm->lock acquisition from the PSCI CPU_ON path. Similarly, move the reader of reset_state outside of the kvm->lock and instead protect it with the mp_state_lock. Note that writes to reset_state::reset have been demoted to regular stores as both readers and writers acquire the mp_state_lock. While the kvm->lock inversion still exists in kvm_reset_vcpu(), at least now PSCI CPU_ON no longer depends on it for serializing vCPU reset. Cc: stable@vger.kernel.org Tested-by: Jeremy Linton Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230327164747.2466958-2-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 31 ++++++++++++++++++++++--------- arch/arm64/kvm/psci.c | 28 ++++++++++++++++------------ arch/arm64/kvm/reset.c | 9 +++++---- 4 files changed, 44 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bcd774d74f34..917586237a4d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -522,6 +522,7 @@ struct kvm_vcpu_arch { /* vcpu power state */ struct kvm_mp_state mp_state; + spinlock_t mp_state_lock; /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..647798da8c41 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -326,6 +326,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int err; + spin_lock_init(&vcpu->arch.mp_state_lock); + /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); @@ -443,34 +445,41 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; } -void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) +static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_arm_vcpu_power_off(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) { - return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; } static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) { - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED); kvm_make_request(KVM_REQ_SUSPEND, vcpu); kvm_vcpu_kick(vcpu); } static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) { - return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED; + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED; } int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - *mp_state = vcpu->arch.mp_state; + *mp_state = READ_ONCE(vcpu->arch.mp_state); return 0; } @@ -480,12 +489,14 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; + spin_lock(&vcpu->arch.mp_state_lock); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.mp_state = *mp_state; + WRITE_ONCE(vcpu->arch.mp_state, *mp_state); break; case KVM_MP_STATE_STOPPED: - kvm_arm_vcpu_power_off(vcpu); + __kvm_arm_vcpu_power_off(vcpu); break; case KVM_MP_STATE_SUSPENDED: kvm_arm_vcpu_suspend(vcpu); @@ -494,6 +505,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ret = -EINVAL; } + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } @@ -1213,7 +1226,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) kvm_arm_vcpu_power_off(vcpu); else - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); return 0; } diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 7fbc4c1b9df0..5767e6baa61a 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -62,6 +62,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) struct vcpu_reset_state *reset_state; struct kvm *kvm = source_vcpu->kvm; struct kvm_vcpu *vcpu = NULL; + int ret = PSCI_RET_SUCCESS; unsigned long cpu_id; cpu_id = smccc_get_arg1(source_vcpu); @@ -76,11 +77,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ if (!vcpu) return PSCI_RET_INVALID_PARAMS; + + spin_lock(&vcpu->arch.mp_state_lock); if (!kvm_arm_vcpu_stopped(vcpu)) { if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) - return PSCI_RET_ALREADY_ON; + ret = PSCI_RET_ALREADY_ON; else - return PSCI_RET_INVALID_PARAMS; + ret = PSCI_RET_INVALID_PARAMS; + + goto out_unlock; } reset_state = &vcpu->arch.reset_state; @@ -96,7 +101,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ reset_state->r0 = smccc_get_arg3(source_vcpu); - WRITE_ONCE(reset_state->reset, true); + reset_state->reset = true; kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); /* @@ -108,7 +113,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; kvm_vcpu_wake_up(vcpu); - return PSCI_RET_SUCCESS; +out_unlock: + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) @@ -168,8 +175,11 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) * after this call is handled and before the VCPUs have been * re-initialized. */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + spin_lock(&tmp->arch.mp_state_lock); + WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); + spin_unlock(&tmp->arch.mp_state_lock); + } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); @@ -229,7 +239,6 @@ static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; u32 psci_fn = smccc_get_function(vcpu); unsigned long val; int ret = 1; @@ -254,9 +263,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) kvm_psci_narrow_to_32bit(vcpu); fallthrough; case PSCI_0_2_FN64_CPU_ON: - mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); - mutex_unlock(&kvm->lock); break; case PSCI_0_2_FN_AFFINITY_INFO: kvm_psci_narrow_to_32bit(vcpu); @@ -395,7 +402,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; u32 psci_fn = smccc_get_function(vcpu); unsigned long val; @@ -405,9 +411,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: - mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); - mutex_unlock(&kvm->lock); break; default: val = PSCI_RET_NOT_SUPPORTED; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 49a3257dec46..9e023546bde0 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -264,15 +264,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) mutex_lock(&vcpu->kvm->lock); ret = kvm_set_vm_width(vcpu); - if (!ret) { - reset_state = vcpu->arch.reset_state; - WRITE_ONCE(vcpu->arch.reset_state.reset, false); - } mutex_unlock(&vcpu->kvm->lock); if (ret) return ret; + spin_lock(&vcpu->arch.mp_state_lock); + reset_state = vcpu->arch.reset_state; + vcpu->arch.reset_state.reset = false; + spin_unlock(&vcpu->arch.mp_state_lock); + /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); -- cgit From c43120afb5c66a3465c7468f5cf9806a26484cde Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 27 Mar 2023 16:47:45 +0000 Subject: KVM: arm64: Avoid lock inversion when setting the VM register width kvm->lock must be taken outside of the vcpu->mutex. Of course, the locking documentation for KVM makes this abundantly clear. Nonetheless, the locking order in KVM/arm64 has been wrong for quite a while; we acquire the kvm->lock while holding the vcpu->mutex all over the shop. All was seemingly fine until commit 42a90008f890 ("KVM: Ensure lockdep knows about kvm->lock vs. vcpu->mutex ordering rule") caught us with our pants down, leading to lockdep barfing: ====================================================== WARNING: possible circular locking dependency detected 6.2.0-rc7+ #19 Not tainted ------------------------------------------------------ qemu-system-aar/859 is trying to acquire lock: ffff5aa69269eba0 (&host_kvm->lock){+.+.}-{3:3}, at: kvm_reset_vcpu+0x34/0x274 but task is already holding lock: ffff5aa68768c0b8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x8c/0xba0 which lock already depends on the new lock. Add a dedicated lock to serialize writes to VM-scoped configuration from the context of a vCPU. Protect the register width flags with the new lock, thus avoiding the need to grab the kvm->lock while holding vcpu->mutex in kvm_reset_vcpu(). Cc: stable@vger.kernel.org Reported-by: Jeremy Linton Link: https://lore.kernel.org/kvmarm/f6452cdd-65ff-34b8-bab0-5c06416da5f6@arm.com/ Tested-by: Jeremy Linton Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230327164747.2466958-3-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/arm.c | 18 ++++++++++++++++++ arch/arm64/kvm/reset.c | 6 +++--- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 917586237a4d..cd1ef8716719 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -199,6 +199,9 @@ struct kvm_arch { /* Mandated version of PSCI */ u32 psci_version; + /* Protects VM-scoped configuration data */ + struct mutex config_lock; + /* * If we encounter a data abort without valid instruction syndrome * information, report this to user space. User space can (and diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 647798da8c41..1620ec3d95ef 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -128,6 +128,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret; + mutex_init(&kvm->arch.config_lock); + +#ifdef CONFIG_LOCKDEP + /* Clue in lockdep that the config_lock must be taken inside kvm->lock */ + mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); + mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->lock); +#endif + ret = kvm_share_hyp(kvm, kvm + 1); if (ret) return ret; @@ -328,6 +338,14 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) spin_lock_init(&vcpu->arch.mp_state_lock); +#ifdef CONFIG_LOCKDEP + /* Inform lockdep that the config_lock is acquired after vcpu->mutex */ + mutex_lock(&vcpu->mutex); + mutex_lock(&vcpu->kvm->arch.config_lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); + mutex_unlock(&vcpu->mutex); +#endif + /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 9e023546bde0..b5dee8e57e77 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -205,7 +205,7 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu) is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT); - lockdep_assert_held(&kvm->lock); + lockdep_assert_held(&kvm->arch.config_lock); if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) { /* @@ -262,9 +262,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) bool loaded; u32 pstate; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); ret = kvm_set_vm_width(vcpu); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); if (ret) return ret; -- cgit From 4bba7f7def6f278266dadf845da472cfbfed784e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 27 Mar 2023 16:47:46 +0000 Subject: KVM: arm64: Use config_lock to protect data ordered against KVM_RUN There are various bits of VM-scoped data that can only be configured before the first call to KVM_RUN, such as the hypercall bitmaps and the PMU. As these fields are protected by the kvm->lock and accessed while holding vcpu->mutex, this is yet another example of lock inversion. Change out the kvm->lock for kvm->arch.config_lock in all of these instances. Opportunistically simplify the locking mechanics of the PMU configuration by holding the config_lock for the entirety of kvm_arm_pmu_v3_set_attr(). Note that this also addresses a couple of bugs. There is an unguarded read of the PMU version in KVM_ARM_VCPU_PMU_V3_FILTER which could race with KVM_ARM_VCPU_PMU_V3_SET_PMU. Additionally, until now writes to the per-vCPU vPMU irq were not serialized VM-wide, meaning concurrent calls to KVM_ARM_VCPU_PMU_V3_IRQ could lead to a false positive in pmu_irq_is_valid(). Cc: stable@vger.kernel.org Tested-by: Jeremy Linton Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230327164747.2466958-4-oliver.upton@linux.dev --- arch/arm64/kvm/arm.c | 4 ++-- arch/arm64/kvm/guest.c | 2 ++ arch/arm64/kvm/hypercalls.c | 4 ++-- arch/arm64/kvm/pmu-emul.c | 23 ++++++----------------- 4 files changed, 12 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1620ec3d95ef..fd8d355aca15 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -624,9 +624,9 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (kvm_vm_is_protected(kvm)) kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07444fa22888..481c79cf22cd 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -957,7 +957,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, switch (attr->group) { case KVM_ARM_VCPU_PMU_V3_CTRL: + mutex_lock(&vcpu->kvm->arch.config_lock); ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); + mutex_unlock(&vcpu->kvm->arch.config_lock); break; case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_set_attr(vcpu, attr); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5da884e11337..fbdbf4257f76 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -377,7 +377,7 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) if (val & ~fw_reg_features) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) && val != *fw_reg_bmap) { @@ -387,7 +387,7 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) WRITE_ONCE(*fw_reg_bmap, val); out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 24908400e190..240168416838 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -874,7 +874,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) struct arm_pmu *arm_pmu; int ret = -ENXIO; - mutex_lock(&kvm->lock); + lockdep_assert_held(&kvm->arch.config_lock); mutex_lock(&arm_pmus_lock); list_for_each_entry(entry, &arm_pmus, entry) { @@ -894,7 +894,6 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) } mutex_unlock(&arm_pmus_lock); - mutex_unlock(&kvm->lock); return ret; } @@ -902,22 +901,20 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { struct kvm *kvm = vcpu->kvm; + lockdep_assert_held(&kvm->arch.config_lock); + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (vcpu->arch.pmu.created) return -EBUSY; - mutex_lock(&kvm->lock); if (!kvm->arch.arm_pmu) { /* No PMU set, get the default one */ kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); - if (!kvm->arch.arm_pmu) { - mutex_unlock(&kvm->lock); + if (!kvm->arch.arm_pmu) return -ENODEV; - } } - mutex_unlock(&kvm->lock); switch (attr->attr) { case KVM_ARM_VCPU_PMU_V3_IRQ: { @@ -961,19 +958,13 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) filter.action != KVM_PMU_EVENT_DENY)) return -EINVAL; - mutex_lock(&kvm->lock); - - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { - mutex_unlock(&kvm->lock); + if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) return -EBUSY; - } if (!kvm->arch.pmu_filter) { kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); - if (!kvm->arch.pmu_filter) { - mutex_unlock(&kvm->lock); + if (!kvm->arch.pmu_filter) return -ENOMEM; - } /* * The default depends on the first applied filter. @@ -992,8 +983,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) else bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); - mutex_unlock(&kvm->lock); - return 0; } case KVM_ARM_VCPU_PMU_V3_SET_PMU: { -- cgit From f00327731131d1b5aa6a1aa9f50bcf8d620ace4c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 27 Mar 2023 16:47:47 +0000 Subject: KVM: arm64: Use config_lock to protect vgic state Almost all of the vgic state is VM-scoped but accessed from the context of a vCPU. These accesses were serialized on the kvm->lock which cannot be nested within a vcpu->mutex critical section. Move over the vgic state to using the config_lock. Tweak the lock ordering where necessary to ensure that the config_lock is acquired after the vcpu->mutex. Acquire the config_lock in kvm_vgic_create() to avoid a race between the converted flows and GIC creation. Where necessary, continue to acquire kvm->lock to avoid a race with vCPU creation (i.e. flows that use lock_all_vcpus()). Finally, promote the locking expectations in comments to lockdep assertions and update the locking documentation for the config_lock as well as vcpu->mutex. Cc: stable@vger.kernel.org Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230327164747.2466958-5-oliver.upton@linux.dev --- arch/arm64/kvm/vgic/vgic-debug.c | 8 +++--- arch/arm64/kvm/vgic/vgic-init.c | 36 +++++++++++++++++---------- arch/arm64/kvm/vgic/vgic-its.c | 18 +++++++++----- arch/arm64/kvm/vgic/vgic-kvm-device.c | 47 +++++++++++++++++++++-------------- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 4 +-- arch/arm64/kvm/vgic/vgic-mmio.c | 12 ++++----- arch/arm64/kvm/vgic/vgic-v4.c | 11 ++++---- arch/arm64/kvm/vgic/vgic.c | 12 +++++---- 8 files changed, 88 insertions(+), 60 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 78cde687383c..07aa0437125a 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -85,7 +85,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) struct kvm *kvm = s->private; struct vgic_state_iter *iter; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; if (iter) { iter = ERR_PTR(-EBUSY); @@ -104,7 +104,7 @@ static void *vgic_debug_start(struct seq_file *s, loff_t *pos) if (end_of_vgic(iter)) iter = NULL; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return iter; } @@ -132,12 +132,12 @@ static void vgic_debug_stop(struct seq_file *s, void *v) if (IS_ERR(v)) return; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); iter = kvm->arch.vgic.iter; kfree(iter->lpi_array); kfree(iter); kvm->arch.vgic.iter = NULL; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index cd134db41a57..9d42c7cb2b58 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -74,9 +74,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) unsigned long i; int ret; - if (irqchip_in_kernel(kvm)) - return -EEXIST; - /* * This function is also called by the KVM_CREATE_IRQCHIP handler, * which had no chance yet to check the availability of the GICv2 @@ -87,10 +84,20 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) !kvm_vgic_global_state.can_emulate_gicv2) return -ENODEV; + /* Must be held to avoid race with vCPU creation */ + lockdep_assert_held(&kvm->lock); + ret = -EBUSY; if (!lock_all_vcpus(kvm)) return ret; + mutex_lock(&kvm->arch.config_lock); + + if (irqchip_in_kernel(kvm)) { + ret = -EEXIST; + goto out_unlock; + } + kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu_has_run_once(vcpu)) goto out_unlock; @@ -118,6 +125,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); out_unlock: + mutex_unlock(&kvm->arch.config_lock); unlock_all_vcpus(kvm); return ret; } @@ -227,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * KVM io device for the redistributor that belongs to this VCPU. */ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); ret = vgic_register_redist_iodev(vcpu); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } return ret; } @@ -250,7 +258,6 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) * The function is generally called when nr_spis has been explicitly set * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. * vgic_initialized() returns true when this function has succeeded. - * Must be called with kvm->lock held! */ int vgic_init(struct kvm *kvm) { @@ -259,6 +266,8 @@ int vgic_init(struct kvm *kvm) int ret = 0, i; unsigned long idx; + lockdep_assert_held(&kvm->arch.config_lock); + if (vgic_initialized(kvm)) return 0; @@ -373,12 +382,13 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; } -/* To be called with kvm->lock held */ static void __kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) @@ -389,9 +399,9 @@ static void __kvm_vgic_destroy(struct kvm *kvm) void kvm_vgic_destroy(struct kvm *kvm) { - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); __kvm_vgic_destroy(kvm); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } /** @@ -414,9 +424,9 @@ int vgic_lazy_init(struct kvm *kvm) if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) return -EBUSY; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); ret = vgic_init(kvm); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); } return ret; @@ -441,7 +451,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (likely(vgic_ready(kvm))) return 0; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); if (vgic_ready(kvm)) goto out; @@ -459,7 +469,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) dist->ready = true; out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 2642e9ce2819..7713cd06104e 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2045,6 +2045,13 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { ret = -ENXIO; goto out; @@ -2058,11 +2065,6 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, goto out; } - if (!lock_all_vcpus(dev->kvm)) { - ret = -EBUSY; - goto out; - } - addr = its->vgic_its_base + offset; len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4; @@ -2076,8 +2078,9 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev, } else { *reg = region->its_read(dev->kvm, its, addr, len); } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return ret; } @@ -2757,6 +2760,8 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) return -EBUSY; } + mutex_lock(&kvm->arch.config_lock); + switch (attr) { case KVM_DEV_ARM_ITS_CTRL_RESET: vgic_its_reset(kvm, its); @@ -2769,6 +2774,7 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) break; } + mutex_unlock(&kvm->arch.config_lock); unlock_all_vcpus(kvm); mutex_unlock(&its->its_lock); mutex_unlock(&kvm->lock); diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index edeac2380591..07e727023deb 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -46,7 +46,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev struct vgic_dist *vgic = &kvm->arch.vgic; int r; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); @@ -68,7 +68,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev r = -ENODEV; } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return r; } @@ -102,7 +102,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri if (get_user(addr, uaddr)) return -EFAULT; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); switch (attr->attr) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); @@ -191,7 +191,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri } out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); if (!r && !write) r = put_user(addr, uaddr); @@ -227,7 +227,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, (val & 31)) return -EINVAL; - mutex_lock(&dev->kvm->lock); + mutex_lock(&dev->kvm->arch.config_lock); if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) ret = -EBUSY; @@ -235,16 +235,16 @@ static int vgic_set_common_attr(struct kvm_device *dev, dev->kvm->arch.vgic.nr_spis = val - VGIC_NR_PRIVATE_IRQS; - mutex_unlock(&dev->kvm->lock); + mutex_unlock(&dev->kvm->arch.config_lock); return ret; } case KVM_DEV_ARM_VGIC_GRP_CTRL: { switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: - mutex_lock(&dev->kvm->lock); + mutex_lock(&dev->kvm->arch.config_lock); r = vgic_init(dev->kvm); - mutex_unlock(&dev->kvm->lock); + mutex_unlock(&dev->kvm->arch.config_lock); return r; case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: /* @@ -260,7 +260,10 @@ static int vgic_set_common_attr(struct kvm_device *dev, mutex_unlock(&dev->kvm->lock); return -EBUSY; } + + mutex_lock(&dev->kvm->arch.config_lock); r = vgic_v3_save_pending_tables(dev->kvm); + mutex_unlock(&dev->kvm->arch.config_lock); unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); return r; @@ -411,15 +414,17 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + + mutex_lock(&dev->kvm->arch.config_lock); + ret = vgic_init(dev->kvm); if (ret) goto out; - if (!lock_all_vcpus(dev->kvm)) { - ret = -EBUSY; - goto out; - } - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val); @@ -432,8 +437,9 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, break; } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && !is_write) @@ -569,12 +575,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->lock); - if (unlikely(!vgic_initialized(dev->kvm))) { - ret = -EBUSY; - goto out; + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; } - if (!lock_all_vcpus(dev->kvm)) { + mutex_lock(&dev->kvm->arch.config_lock); + + if (unlikely(!vgic_initialized(dev->kvm))) { ret = -EBUSY; goto out; } @@ -609,8 +617,9 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, break; } - unlock_all_vcpus(dev->kvm); out: + mutex_unlock(&dev->kvm->arch.config_lock); + unlock_all_vcpus(dev->kvm); mutex_unlock(&dev->kvm->lock); if (!ret && uaccess && !is_write) { diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 91201f743033..472b18ac92a2 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -111,7 +111,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, case GICD_CTLR: { bool was_enabled, is_hwsgi; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); was_enabled = dist->enabled; is_hwsgi = dist->nassgireq; @@ -139,7 +139,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, else if (!was_enabled && dist->enabled) vgic_kick_vcpus(vcpu->kvm); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); break; } case GICD_TYPER: diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index e67b3b2c8044..1939c94e0b24 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -530,13 +530,13 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 val; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); val = __vgic_mmio_read_active(vcpu, addr, len); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); return val; } @@ -625,13 +625,13 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_cactive(vcpu, addr, len, val); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, @@ -662,13 +662,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.config_lock); vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_sactive(vcpu, addr, len, val); vgic_access_active_finish(vcpu, intid); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.config_lock); } int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index a413718be92b..3bb003478060 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -232,9 +232,8 @@ int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq) * @kvm: Pointer to the VM being initialized * * We may be called each time a vITS is created, or when the - * vgic is initialized. This relies on kvm->lock to be - * held. In both cases, the number of vcpus should now be - * fixed. + * vgic is initialized. In both cases, the number of vcpus + * should now be fixed. */ int vgic_v4_init(struct kvm *kvm) { @@ -243,6 +242,8 @@ int vgic_v4_init(struct kvm *kvm) int nr_vcpus, ret; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + if (!kvm_vgic_global_state.has_gicv4) return 0; /* Nothing to see here... move along. */ @@ -309,14 +310,14 @@ int vgic_v4_init(struct kvm *kvm) /** * vgic_v4_teardown - Free the GICv4 data structures * @kvm: Pointer to the VM being destroyed - * - * Relies on kvm->lock to be held. */ void vgic_v4_teardown(struct kvm *kvm) { struct its_vm *its_vm = &kvm->arch.vgic.its_vm; int i; + lockdep_assert_held(&kvm->arch.config_lock); + if (!its_vm->vpes) return; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index d97e6080b421..0a005da83ae6 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -24,11 +24,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { /* * Locking order is always: * kvm->lock (mutex) - * its->cmd_lock (mutex) - * its->its_lock (mutex) - * vgic_cpu->ap_list_lock must be taken with IRQs disabled - * kvm->lpi_list_lock must be taken with IRQs disabled - * vgic_irq->irq_lock must be taken with IRQs disabled + * vcpu->mutex (mutex) + * kvm->arch.config_lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock must be taken with IRQs disabled + * kvm->lpi_list_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled * * As the ap_list_lock might be taken from the timer interrupt handler, * we have to disable IRQs before taking this lock and everything lower -- cgit From 0d0ae656b71155ccc0be9388beef77a1f7e7558e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:41 +0100 Subject: KVM: arm64: timers: Use a per-vcpu, per-timer accumulator for fractional ns Instead of accumulating the fractional ns value generated every time we compute a ns delta in a global variable, use a per-vcpu, per-timer variable. This keeps the fractional ns local to the timer instead of contributing to any odd, unrelated timer. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-2-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 2 +- include/kvm/arm_arch_timer.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e1af4301b913..9515c645f03d 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -212,7 +212,7 @@ static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, ns = cyclecounter_cyc2ns(timecounter->cc, val - now, timecounter->mask, - &timecounter->frac); + &timer_ctx->ns_frac); return ns; } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index c52a6e6839da..70d47c4adc6a 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -44,6 +44,7 @@ struct arch_timer_context { /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; + u64 ns_frac; /* Offset for this counter/timer */ struct arch_timer_offset offset; -- cgit From eaacaa4f26ade43dfdfdd3707c95d734b5ffc530 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:42 +0100 Subject: arm64: Add CNTPOFF_EL2 register definition Add the definition for CNTPOFF_EL2 in the description file. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-3-maz@kernel.org --- arch/arm64/tools/sysreg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index dd5a9c7e310f..7063f1aacc54 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1952,6 +1952,10 @@ Sysreg CONTEXTIDR_EL2 3 4 13 0 1 Fields CONTEXTIDR_ELx EndSysreg +Sysreg CNTPOFF_EL2 3 4 14 0 6 +Field 63:0 PhysicalOffset +EndSysreg + Sysreg CPACR_EL12 3 5 1 0 2 Fields CPACR_ELx EndSysreg -- cgit From 326349943ed181890b8b2af2755bd0eac93bd66d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:43 +0100 Subject: arm64: Add HAS_ECV_CNTPOFF capability Add the probing code for the FEAT_ECV variant that implements CNTPOFF_EL2. Why it is optional is a mystery, but let's try and detect it. Reviewed-by: Reiji Watanabe Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-4-maz@kernel.org --- arch/arm64/kernel/cpufeature.c | 11 +++++++++++ arch/arm64/tools/cpucaps | 1 + 2 files changed, 12 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2e3e55139777..c331c49a7d19 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2223,6 +2223,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .min_field_value = 1, }, + { + .desc = "Enhanced Counter Virtualization (CNTPOFF)", + .capability = ARM64_HAS_ECV_CNTPOFF, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR0_EL1, + .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT, + .field_width = 4, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF, + }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 37b1340e9646..40ba95472594 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -23,6 +23,7 @@ HAS_DCPOP HAS_DIT HAS_E0PD HAS_ECV +HAS_ECV_CNTPOFF HAS_EPAN HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH_QARMA3 -- cgit From 2b4825a8694018901e641ccc2eafd0fff58d1415 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:44 +0100 Subject: KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer With ECV and CNTPOFF_EL2, it is very easy to offer an offset for the physical timer. So let's do just that. Nothing can set the offset yet, so this should have no effect whatsoever (famous last words...). Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-5-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 18 +++++++++++++++++- arch/arm64/kvm/hypercalls.c | 2 +- include/clocksource/arm_arch_timer.h | 1 + include/kvm/arm_arch_timer.h | 2 ++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 9515c645f03d..3118ea0a1b41 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -52,6 +52,11 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +static bool has_cntpoff(void) +{ + return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); +} + u32 timer_get_ctl(struct arch_timer_context *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; @@ -84,7 +89,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static u64 timer_get_offset(struct arch_timer_context *ctxt) { - if (ctxt->offset.vm_offset) + if (ctxt && ctxt->offset.vm_offset) return *ctxt->offset.vm_offset; return 0; @@ -432,6 +437,12 @@ static void set_cntvoff(u64 cntvoff) kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); } +static void set_cntpoff(u64 cntpoff) +{ + if (has_cntpoff()) + write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); +} + static void timer_save_state(struct arch_timer_context *ctx) { struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); @@ -480,6 +491,7 @@ static void timer_save_state(struct arch_timer_context *ctx) write_sysreg_el0(0, SYS_CNTP_CTL); isb(); + set_cntpoff(0); break; case NR_KVM_TIMERS: BUG(); @@ -550,6 +562,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: + set_cntpoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); @@ -767,6 +780,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) vtimer->vcpu = vcpu; vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset; ptimer->vcpu = vcpu; + ptimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.poffset; /* Synchronize cntvoff across all vtimers of a VM. */ timer_set_offset(vtimer, kvm_phys_timer_read()); @@ -1297,6 +1311,8 @@ void kvm_timer_init_vhe(void) val = read_sysreg(cnthctl_el2); val |= (CNTHCTL_EL1PCEN << cnthctl_shift); val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); + if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) + val |= CNTHCTL_ECV; write_sysreg(val, cnthctl_el2); } diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5da884e11337..39a4707e081d 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -47,7 +47,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset; break; case KVM_PTP_PHYS_COUNTER: - cycles = systime_snapshot.cycles; + cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset; break; default: return; diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index 057c8964aefb..cbbc9a6dc571 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -21,6 +21,7 @@ #define CNTHCTL_EVNTEN (1 << 2) #define CNTHCTL_EVNTDIR (1 << 3) #define CNTHCTL_EVNTI (0xF << 4) +#define CNTHCTL_ECV (1 << 12) enum arch_timer_reg { ARCH_TIMER_REG_CTRL, diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 70d47c4adc6a..2dd0fd2406fb 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -34,6 +34,8 @@ struct arch_timer_offset { struct arch_timer_vm_data { /* Offset applied to the virtual timer/counter */ u64 voffset; + /* Offset applied to the physical timer/counter */ + u64 poffset; }; struct arch_timer_context { -- cgit From c605ee245097d02ed5933e63ac601a8571712457 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:45 +0100 Subject: KVM: arm64: timers: Allow physical offset without CNTPOFF_EL2 CNTPOFF_EL2 is awesome, but it is mostly vapourware, and no publicly available implementation has it. So for the common mortals, let's implement the emulated version of this thing. It means trapping accesses to the physical counter and timer, and emulate some of it as necessary. As for CNTPOFF_EL2, nobody sets the offset yet. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-6-maz@kernel.org --- arch/arm64/include/asm/sysreg.h | 2 + arch/arm64/kvm/arch_timer.c | 98 +++++++++++++++++++++++++++++--------- arch/arm64/kvm/hyp/nvhe/timer-sr.c | 18 ++++--- arch/arm64/kvm/sys_regs.c | 9 ++++ 4 files changed, 98 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9e3ecba3c4e6..f8da9e1b0c11 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -388,6 +388,7 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1) #define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) #define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) @@ -400,6 +401,7 @@ #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) +#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) #define __PMEV_op2(n) ((n) & 0x7) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 3118ea0a1b41..bb64a71ae193 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -458,6 +458,8 @@ static void timer_save_state(struct arch_timer_context *ctx) goto out; switch (index) { + u64 cval; + case TIMER_VTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); @@ -485,7 +487,12 @@ static void timer_save_state(struct arch_timer_context *ctx) break; case TIMER_PTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); - timer_set_cval(ctx, read_sysreg_el0(SYS_CNTP_CVAL)); + cval = read_sysreg_el0(SYS_CNTP_CVAL); + + if (!has_cntpoff()) + cval -= timer_get_offset(ctx); + + timer_set_cval(ctx, cval); /* Disable the timer */ write_sysreg_el0(0, SYS_CNTP_CTL); @@ -555,6 +562,8 @@ static void timer_restore_state(struct arch_timer_context *ctx) goto out; switch (index) { + u64 cval, offset; + case TIMER_VTIMER: set_cntvoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); @@ -562,8 +571,12 @@ static void timer_restore_state(struct arch_timer_context *ctx) write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: - set_cntpoff(timer_get_offset(ctx)); - write_sysreg_el0(timer_get_cval(ctx), SYS_CNTP_CVAL); + cval = timer_get_cval(ctx); + offset = timer_get_offset(ctx); + set_cntpoff(offset); + if (!has_cntpoff()) + cval += offset; + write_sysreg_el0(cval, SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); break; @@ -634,6 +647,61 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); } +/* If _pred is true, set bit in _set, otherwise set it in _clr */ +#define assign_clear_set_bit(_pred, _bit, _clr, _set) \ + do { \ + if (_pred) \ + (_set) |= (_bit); \ + else \ + (_clr) |= (_bit); \ + } while (0) + +static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) +{ + bool tpt, tpc; + u64 clr, set; + + /* + * No trapping gets configured here with nVHE. See + * __timer_enable_traps(), which is where the stuff happens. + */ + if (!has_vhe()) + return; + + /* + * Our default policy is not to trap anything. As we progress + * within this function, reality kicks in and we start adding + * traps based on emulation requirements. + */ + tpt = tpc = false; + + /* + * We have two possibility to deal with a physical offset: + * + * - Either we have CNTPOFF (yay!) or the offset is 0: + * we let the guest freely access the HW + * + * - or neither of these condition apply: + * we trap accesses to the HW, but still use it + * after correcting the physical offset + */ + if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) + tpt = tpc = true; + + /* + * Now that we have collected our requirements, compute the + * trap and enable bits. + */ + set = 0; + clr = 0; + + assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); + assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); + + /* This only happens on VHE, so use the CNTKCTL_EL1 accessor */ + sysreg_clear_set(cntkctl_el1, clr, set); +} + void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); @@ -657,9 +725,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) timer_restore_state(map.direct_vtimer); if (map.direct_ptimer) timer_restore_state(map.direct_ptimer); - if (map.emul_ptimer) timer_emulate(map.emul_ptimer); + + timer_set_traps(vcpu, &map); } bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) @@ -1292,28 +1361,11 @@ no_vgic: return 0; } -/* - * On VHE system, we only need to configure the EL2 timer trap register once, - * not for every world switch. - * The host kernel runs at EL2 with HCR_EL2.TGE == 1, - * and this makes those bits have no effect for the host kernel execution. - */ +/* If we have CNTPOFF, permanently set ECV to enable it */ void kvm_timer_init_vhe(void) { - /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ - u32 cnthctl_shift = 10; - u64 val; - - /* - * VHE systems allow the guest direct access to the EL1 physical - * timer/counter. - */ - val = read_sysreg(cnthctl_el2); - val |= (CNTHCTL_EL1PCEN << cnthctl_shift); - val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) - val |= CNTHCTL_ECV; - write_sysreg(val, cnthctl_el2); + sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV); } static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) diff --git a/arch/arm64/kvm/hyp/nvhe/timer-sr.c b/arch/arm64/kvm/hyp/nvhe/timer-sr.c index 9072e71693ba..b185ac0dbd47 100644 --- a/arch/arm64/kvm/hyp/nvhe/timer-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/timer-sr.c @@ -9,6 +9,7 @@ #include #include +#include void __kvm_timer_set_cntvoff(u64 cntvoff) { @@ -35,14 +36,19 @@ void __timer_disable_traps(struct kvm_vcpu *vcpu) */ void __timer_enable_traps(struct kvm_vcpu *vcpu) { - u64 val; + u64 clr = 0, set = 0; /* * Disallow physical timer access for the guest - * Physical counter access is allowed + * Physical counter access is allowed if no offset is enforced + * or running protected (we don't offset anything in this case). */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); + clr = CNTHCTL_EL1PCEN; + if (is_protected_kvm_enabled() || + !kern_hyp_va(vcpu->kvm)->arch.timer_data.poffset) + set |= CNTHCTL_EL1PCTEN; + else + clr |= CNTHCTL_EL1PCTEN; + + sysreg_clear_set(cnthctl_el2, clr, set); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 53749d3a0996..be7c2598e563 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1139,6 +1139,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, tmr = TIMER_PTIMER; treg = TIMER_REG_CVAL; break; + case SYS_CNTPCT_EL0: + case SYS_CNTPCTSS_EL0: + case SYS_AARCH32_CNTPCT: + tmr = TIMER_PTIMER; + treg = TIMER_REG_CNT; + break; default: print_sys_reg_msg(p, "%s", "Unhandled trapped timer register"); kvm_inject_undefined(vcpu); @@ -2075,6 +2081,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVTYPER1_EL0(14), AMU_AMEVTYPER1_EL0(15), + { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, @@ -2525,6 +2533,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, { CP15_PMU_SYS_REG(DIRECT, 0, 0, 9, 0), .access = access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ + { SYS_DESC(SYS_AARCH32_CNTPCT), access_arch_timer }, { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ -- cgit From 96906a9150a86a86b0464939625279b8e19f6e88 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:46 +0100 Subject: KVM: arm64: Expose {un,}lock_all_vcpus() to the rest of KVM Being able to lock/unlock all vcpus in one go is a feature that only the vgic has enjoyed so far. Let's be brave and expose it to the world. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-7-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/arm.c | 43 +++++++++++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic-kvm-device.c | 38 ------------------------------- arch/arm64/kvm/vgic/vgic.h | 3 --- 4 files changed, 46 insertions(+), 41 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bcd774d74f34..002a10cbade2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -922,6 +922,9 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); int __init kvm_sys_reg_table_init(void); +bool lock_all_vcpus(struct kvm *kvm); +void unlock_all_vcpus(struct kvm *kvm); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..ae5110cc3bad 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1484,6 +1484,49 @@ long kvm_arch_vm_ioctl(struct file *filp, } } +/* unlocks vcpus from @vcpu_lock_idx and smaller */ +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) +{ + struct kvm_vcpu *tmp_vcpu; + + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&tmp_vcpu->mutex); + } +} + +void unlock_all_vcpus(struct kvm *kvm) +{ + lockdep_assert_held(&kvm->lock); + + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); +} + +/* Returns true if all vcpus were locked, false otherwise */ +bool lock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *tmp_vcpu; + unsigned long c; + + lockdep_assert_held(&kvm->lock); + + /* + * Any time a vcpu is in an ioctl (including running), the + * core KVM code tries to grab the vcpu->mutex. + * + * By grabbing the vcpu->mutex of all VCPUs we ensure that no + * other VCPUs can fiddle with the state while we access it. + */ + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { + if (!mutex_trylock(&tmp_vcpu->mutex)) { + unlock_vcpus(kvm, c - 1); + return false; + } + } + + return true; +} + static unsigned long nvhe_percpu_size(void) { return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) - diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index edeac2380591..04dd68835b3f 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -342,44 +342,6 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, return 0; } -/* unlocks vcpus from @vcpu_lock_idx and smaller */ -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -void unlock_all_vcpus(struct kvm *kvm) -{ - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -/* Returns true if all vcpus were locked, false otherwise */ -bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - /* - * Any time a vcpu is run, vcpu_load is called which tries to grab the - * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure - * that no other VCPUs are run and fiddle with the vgic state while we - * access it. - */ - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - /** * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state * diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 7f7f3c5ed85a..f9923beedd27 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -273,9 +273,6 @@ int vgic_init(struct kvm *kvm); void vgic_debug_init(struct kvm *kvm); void vgic_debug_destroy(struct kvm *kvm); -bool lock_all_vcpus(struct kvm *kvm); -void unlock_all_vcpus(struct kvm *kvm); - static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) { struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; -- cgit From 30ec7997d175cd689fc61bfc4059f4d35b11858c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:47 +0100 Subject: KVM: arm64: timers: Allow userspace to set the global counter offset And this is the moment you have all been waiting for: setting the counter offset from userspace. We expose a brand new capability that reports the ability to set the offset for both the virtual and physical sides. In keeping with the architecture, the offset is expressed as a delta that is substracted from the physical counter value. Once this new API is used, there is no going back, and the counters cannot be written to to set the offsets implicitly (the writes are instead ignored). Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-8-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 4 +++ arch/arm64/include/uapi/asm/kvm.h | 9 +++++++ arch/arm64/kvm/arch_timer.c | 54 +++++++++++++++++++++++++++++++++++---- arch/arm64/kvm/arm.c | 8 ++++++ include/uapi/linux/kvm.h | 3 +++ 5 files changed, 73 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 002a10cbade2..116233a390e9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -221,6 +221,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_EL1_32BIT 4 /* PSCI SYSTEM_SUSPEND enabled for the guest */ #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 + /* VM counter offset */ +#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 unsigned long flags; @@ -1010,6 +1012,8 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, struct kvm_arm_copy_mte_tags *copy_tags); +int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, + struct kvm_arm_counter_offset *offset); /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f8129c624b07..12fb0d8a760a 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags { __u64 reserved[2]; }; +/* + * Counter/Timer offset structure. Describe the virtual/physical offset. + * To be used with KVM_ARM_SET_COUNTER_OFFSET. + */ +struct kvm_arm_counter_offset { + __u64 counter_offset; + __u64 reserved; +}; + #define KVM_ARM_TAGS_TO_GUEST 0 #define KVM_ARM_TAGS_FROM_GUEST 1 diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index bb64a71ae193..771504c79711 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -851,9 +851,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) ptimer->vcpu = vcpu; ptimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.poffset; - /* Synchronize cntvoff across all vtimers of a VM. */ - timer_set_offset(vtimer, kvm_phys_timer_read()); - timer_set_offset(ptimer, 0); + /* Synchronize offsets across timers of a VM if not already provided */ + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { + timer_set_offset(vtimer, kvm_phys_timer_read()); + timer_set_offset(ptimer, 0); + } hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; @@ -897,8 +899,11 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); break; case KVM_REG_ARM_TIMER_CNT: - timer = vcpu_vtimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, + &vcpu->kvm->arch.flags)) { + timer = vcpu_vtimer(vcpu); + timer_set_offset(timer, kvm_phys_timer_read() - value); + } break; case KVM_REG_ARM_TIMER_CVAL: timer = vcpu_vtimer(vcpu); @@ -908,6 +913,13 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) timer = vcpu_ptimer(vcpu); kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); break; + case KVM_REG_ARM_PTIMER_CNT: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, + &vcpu->kvm->arch.flags)) { + timer = vcpu_ptimer(vcpu); + timer_set_offset(timer, kvm_phys_timer_read() - value); + } + break; case KVM_REG_ARM_PTIMER_CVAL: timer = vcpu_ptimer(vcpu); kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); @@ -1443,3 +1455,35 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } + +int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, + struct kvm_arm_counter_offset *offset) +{ + int ret = 0; + + if (offset->reserved) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (lock_all_vcpus(kvm)) { + set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); + + /* + * If userspace decides to set the offset using this + * API rather than merely restoring the counter + * values, the offset applies to both the virtual and + * physical views. + */ + kvm->arch.timer_data.voffset = offset->counter_offset; + kvm->arch.timer_data.poffset = offset->counter_offset; + + unlock_all_vcpus(kvm); + } else { + ret = -EBUSY; + } + + mutex_unlock(&kvm->lock); + + return ret; +} diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ae5110cc3bad..1c8a4bbae684 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -220,6 +220,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: case KVM_CAP_ARM_SYSTEM_SUSPEND: + case KVM_CAP_COUNTER_OFFSET: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -1479,6 +1480,13 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); } + case KVM_ARM_SET_COUNTER_OFFSET: { + struct kvm_arm_counter_offset offset; + + if (copy_from_user(&offset, argp, sizeof(offset))) + return -EFAULT; + return kvm_vm_ioctl_set_counter_offset(kvm, &offset); + } default: return -EINVAL; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d77aef872a0a..6a7e1a0ecf04 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1184,6 +1184,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226 +#define KVM_CAP_COUNTER_OFFSET 227 #ifdef KVM_CAP_IRQ_ROUTING @@ -1543,6 +1544,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) +/* Available with KVM_CAP_COUNTER_OFFSET */ +#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit From 680232a94c1289aad25ffae02f2785823763b456 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:48 +0100 Subject: KVM: arm64: timers: Allow save/restoring of the physical timer Nothing like being 10 year late to a party! Now that userspace can set counter offsets, we can save/restore the physical timer as well! Nobody really cared so far, but you're welcome anyway. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-9-maz@kernel.org --- arch/arm64/kvm/guest.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07444fa22888..46e910819de6 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -590,11 +590,16 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) return copy_core_reg_indices(vcpu, NULL); } -/** - * ARM64 versions of the TIMER registers, always available on arm64 - */ +static const u64 timer_reg_list[] = { + KVM_REG_ARM_TIMER_CTL, + KVM_REG_ARM_TIMER_CNT, + KVM_REG_ARM_TIMER_CVAL, + KVM_REG_ARM_PTIMER_CTL, + KVM_REG_ARM_PTIMER_CNT, + KVM_REG_ARM_PTIMER_CVAL, +}; -#define NUM_TIMER_REGS 3 +#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) static bool is_timer_reg(u64 index) { @@ -602,6 +607,9 @@ static bool is_timer_reg(u64 index) case KVM_REG_ARM_TIMER_CTL: case KVM_REG_ARM_TIMER_CNT: case KVM_REG_ARM_TIMER_CVAL: + case KVM_REG_ARM_PTIMER_CTL: + case KVM_REG_ARM_PTIMER_CNT: + case KVM_REG_ARM_PTIMER_CVAL: return true; } return false; @@ -609,14 +617,11 @@ static bool is_timer_reg(u64 index) static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) - return -EFAULT; - uindices++; - if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) - return -EFAULT; + for (int i = 0; i < NUM_TIMER_REGS; i++) { + if (put_user(timer_reg_list[i], uindices)) + return -EFAULT; + uindices++; + } return 0; } -- cgit From 5591805d2c21b70838b723b71b8ff613de51cfff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:49 +0100 Subject: KVM: arm64: timers: Rationalise per-vcpu timer init The way we initialise our timer contexts may be satisfactory for two timers, but will be getting pretty annoying with four. Cleanup the whole thing by removing the code duplication and getting rid of unused IRQ configuration elements. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-10-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 73 +++++++++++++++++++++++--------------------- include/kvm/arm_arch_timer.h | 1 - 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 771504c79711..e46f04ed8f86 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -30,14 +30,9 @@ static u32 host_ptimer_irq_flags; static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); -static const struct kvm_irq_level default_ptimer_irq = { - .irq = 30, - .level = 1, -}; - -static const struct kvm_irq_level default_vtimer_irq = { - .irq = 27, - .level = 1, +static const u8 default_ppi[] = { + [TIMER_PTIMER] = 30, + [TIMER_VTIMER] = 27, }; static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); @@ -820,12 +815,14 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - timer_set_ctl(vcpu_vtimer(vcpu), 0); - timer_set_ctl(vcpu_ptimer(vcpu), 0); + for (int i = 0; i < NR_KVM_TIMERS; i++) + timer_set_ctl(vcpu_get_timer(vcpu, i), 0); + if (timer->enabled) { - kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); - kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); + for (int i = 0; i < NR_KVM_TIMERS; i++) + kvm_timer_update_irq(vcpu, false, + vcpu_get_timer(vcpu, i)); if (irqchip_in_kernel(vcpu->kvm)) { kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); @@ -840,39 +837,47 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) return 0; } +static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) +{ + struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); + struct kvm *kvm = vcpu->kvm; + + ctxt->vcpu = vcpu; + + if (timerid == TIMER_VTIMER) + ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; + else + ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; + + hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + ctxt->hrtimer.function = kvm_hrtimer_expire; + ctxt->irq.irq = default_ppi[timerid]; + + switch (timerid) { + case TIMER_PTIMER: + ctxt->host_timer_irq = host_ptimer_irq; + break; + case TIMER_VTIMER: + ctxt->host_timer_irq = host_vtimer_irq; + break; + } +} + void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - vtimer->vcpu = vcpu; - vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset; - ptimer->vcpu = vcpu; - ptimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.poffset; + for (int i = 0; i < NR_KVM_TIMERS; i++) + timer_context_init(vcpu, i); /* Synchronize offsets across timers of a VM if not already provided */ if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { - timer_set_offset(vtimer, kvm_phys_timer_read()); - timer_set_offset(ptimer, 0); + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); + timer_set_offset(vcpu_ptimer(vcpu), 0); } hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; - - hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - vtimer->hrtimer.function = kvm_hrtimer_expire; - ptimer->hrtimer.function = kvm_hrtimer_expire; - - vtimer->irq.irq = default_vtimer_irq.irq; - ptimer->irq.irq = default_ptimer_irq.irq; - - vtimer->host_timer_irq = host_vtimer_irq; - ptimer->host_timer_irq = host_ptimer_irq; - - vtimer->host_timer_irq_flags = host_vtimer_irq_flags; - ptimer->host_timer_irq_flags = host_ptimer_irq_flags; } void kvm_timer_cpu_up(void) diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 2dd0fd2406fb..c746ef64220b 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -59,7 +59,6 @@ struct arch_timer_context { /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; - u32 host_timer_irq_flags; }; struct timer_map { -- cgit From 33c549460ef9119eb115484e81f54521122341db Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:50 +0100 Subject: KVM: arm64: timers: Abstract per-timer IRQ access As we are about to move the location of the per-timer IRQ into the VM structure, abstract the location of the IRQ behind an accessor. This will make the repainting sligntly less painful. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-11-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 38 +++++++++++++++++++------------------- include/kvm/arm_arch_timer.h | 2 ++ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e46f04ed8f86..d08d8c2fc30d 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -392,12 +392,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, int ret; timer_ctx->irq.level = new_level; - trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), timer_ctx->irq.level); if (!userspace_irqchip(vcpu->kvm)) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - timer_ctx->irq.irq, + timer_irq(timer_ctx), timer_ctx->irq.level, timer_ctx); WARN_ON(ret); @@ -607,7 +607,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) - phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); + phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); phys_active |= ctx->irq.level; @@ -825,9 +825,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) vcpu_get_timer(vcpu, i)); if (irqchip_in_kernel(vcpu->kvm)) { - kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); + kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); if (map.direct_ptimer) - kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); + kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); } } @@ -851,7 +851,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ctxt->hrtimer.function = kvm_hrtimer_expire; - ctxt->irq.irq = default_ppi[timerid]; + timer_irq(ctxt) = default_ppi[timerid]; switch (timerid) { case TIMER_PTIMER: @@ -1295,19 +1295,19 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) int vtimer_irq, ptimer_irq, ret; unsigned long i; - vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; + vtimer_irq = timer_irq(vcpu_vtimer(vcpu)); ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); if (ret) return false; - ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + ptimer_irq = timer_irq(vcpu_ptimer(vcpu)); ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); if (ret) return false; kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { - if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || - vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) + if (timer_irq(vcpu_vtimer(vcpu)) != vtimer_irq || + timer_irq(vcpu_ptimer(vcpu)) != ptimer_irq) return false; } @@ -1322,9 +1322,9 @@ bool kvm_arch_timer_get_input_level(int vintid) if (WARN(!vcpu, "No vcpu context!\n")) return false; - if (vintid == vcpu_vtimer(vcpu)->irq.irq) + if (vintid == timer_irq(vcpu_vtimer(vcpu))) timer = vcpu_vtimer(vcpu); - else if (vintid == vcpu_ptimer(vcpu)->irq.irq) + else if (vintid == timer_irq(vcpu_ptimer(vcpu))) timer = vcpu_ptimer(vcpu); else BUG(); @@ -1358,7 +1358,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, - map.direct_vtimer->irq.irq, + timer_irq(map.direct_vtimer), &arch_timer_irq_ops); if (ret) return ret; @@ -1366,7 +1366,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (map.direct_ptimer) { ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, - map.direct_ptimer->irq.irq, + timer_irq(map.direct_ptimer), &arch_timer_irq_ops); } @@ -1391,8 +1391,8 @@ static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) unsigned long i; kvm_for_each_vcpu(i, vcpu, kvm) { - vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; - vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; + timer_irq(vcpu_vtimer(vcpu)) = vtimer_irq; + timer_irq(vcpu_ptimer(vcpu)) = ptimer_irq; } } @@ -1417,10 +1417,10 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: - set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + set_timer_irqs(vcpu->kvm, irq, timer_irq(ptimer)); break; case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: - set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + set_timer_irqs(vcpu->kvm, timer_irq(vtimer), irq); break; default: return -ENXIO; @@ -1446,7 +1446,7 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } - irq = timer->irq.irq; + irq = timer_irq(timer); return put_user(irq, uaddr); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index c746ef64220b..27cada09f588 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -109,6 +109,8 @@ bool kvm_arch_timer_get_input_level(int vintid); #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) +#define timer_irq(ctx) ((ctx)->irq.irq) + u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, enum kvm_arch_timer_regs treg); -- cgit From 8a5eb2d210807e7dbe9ece7075533014cf4b9c27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:51 +0100 Subject: KVM: arm64: timers: Move the timer IRQs into arch_timer_vm_data Having the timer IRQs duplicated into each vcpu isn't great, and becomes absolutely awful with NV. So let's move these into the per-VM arch_timer_vm_data structure. This simplifies a lot of code, but requires us to introduce a mutex so that we can reason about userspace trying to change an interrupt number while another vcpu is running, something that wasn't really well handled so far. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-12-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 2 + arch/arm64/kvm/arch_timer.c | 108 ++++++++++++++++++++++---------------- arch/arm64/kvm/arm.c | 2 + include/kvm/arm_arch_timer.h | 18 +++++-- 4 files changed, 82 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 116233a390e9..1280154c9ef3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -223,6 +223,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 /* VM counter offset */ #define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6 + /* Timer PPIs made immutable */ +#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7 unsigned long flags; diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d08d8c2fc30d..1d811735e05f 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -851,7 +851,6 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ctxt->hrtimer.function = kvm_hrtimer_expire; - timer_irq(ctxt) = default_ppi[timerid]; switch (timerid) { case TIMER_PTIMER: @@ -880,6 +879,13 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) timer->bg_timer.function = kvm_bg_timer_expire; } +void kvm_timer_init_vm(struct kvm *kvm) +{ + mutex_init(&kvm->arch.timer_data.lock); + for (int i = 0; i < NR_KVM_TIMERS; i++) + kvm->arch.timer_data.ppi[i] = default_ppi[i]; +} + void kvm_timer_cpu_up(void) { enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); @@ -1292,44 +1298,56 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) { - int vtimer_irq, ptimer_irq, ret; - unsigned long i; + u32 ppis = 0; + bool valid; - vtimer_irq = timer_irq(vcpu_vtimer(vcpu)); - ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); - if (ret) - return false; + mutex_lock(&vcpu->kvm->arch.timer_data.lock); - ptimer_irq = timer_irq(vcpu_ptimer(vcpu)); - ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); - if (ret) - return false; + for (int i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx; + int irq; - kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { - if (timer_irq(vcpu_vtimer(vcpu)) != vtimer_irq || - timer_irq(vcpu_ptimer(vcpu)) != ptimer_irq) - return false; + ctx = vcpu_get_timer(vcpu, i); + irq = timer_irq(ctx); + if (kvm_vgic_set_owner(vcpu, irq, ctx)) + break; + + /* + * We know by construction that we only have PPIs, so + * all values are less than 32. + */ + ppis |= BIT(irq); } - return true; + valid = hweight32(ppis) == NR_KVM_TIMERS; + + if (valid) + set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); + + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); + + return valid; } bool kvm_arch_timer_get_input_level(int vintid) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); - struct arch_timer_context *timer; if (WARN(!vcpu, "No vcpu context!\n")) return false; - if (vintid == timer_irq(vcpu_vtimer(vcpu))) - timer = vcpu_vtimer(vcpu); - else if (vintid == timer_irq(vcpu_ptimer(vcpu))) - timer = vcpu_ptimer(vcpu); - else - BUG(); + for (int i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx; + + ctx = vcpu_get_timer(vcpu, i); + if (timer_irq(ctx) == vintid) + return kvm_timer_should_fire(ctx); + } - return kvm_timer_should_fire(timer); + /* A timer IRQ has fired, but no matching timer was found? */ + WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); + + return false; } int kvm_timer_enable(struct kvm_vcpu *vcpu) @@ -1385,23 +1403,10 @@ void kvm_timer_init_vhe(void) sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV); } -static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) -{ - struct kvm_vcpu *vcpu; - unsigned long i; - - kvm_for_each_vcpu(i, vcpu, kvm) { - timer_irq(vcpu_vtimer(vcpu)) = vtimer_irq; - timer_irq(vcpu_ptimer(vcpu)) = ptimer_irq; - } -} - int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { int __user *uaddr = (int __user *)(long)attr->addr; - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - int irq; + int irq, idx, ret = 0; if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; @@ -1412,21 +1417,36 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!(irq_is_ppi(irq))) return -EINVAL; - if (vcpu->arch.timer_cpu.enabled) - return -EBUSY; + mutex_lock(&vcpu->kvm->arch.timer_data.lock); + + if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, + &vcpu->kvm->arch.flags)) { + ret = -EBUSY; + goto out; + } switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: - set_timer_irqs(vcpu->kvm, irq, timer_irq(ptimer)); + idx = TIMER_VTIMER; break; case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: - set_timer_irqs(vcpu->kvm, timer_irq(vtimer), irq); + idx = TIMER_PTIMER; break; default: - return -ENXIO; + ret = -ENXIO; + goto out; } - return 0; + /* + * We cannot validate the IRQ unicity before we run, so take it at + * face value. The verdict will be given on first vcpu run, for each + * vcpu. Yes this is late. Blame it on the stupid API. + */ + vcpu->kvm->arch.timer_data.ppi[idx] = irq; + +out: + mutex_unlock(&vcpu->kvm->arch.timer_data.lock); + return ret; } int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1c8a4bbae684..4c5e9dfbf83a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -148,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_vgic_early_init(kvm); + kvm_timer_init_vm(kvm); + /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->max_vcpus = kvm_arm_default_max_vcpus(); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 27cada09f588..f093ea9f540d 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -36,14 +36,16 @@ struct arch_timer_vm_data { u64 voffset; /* Offset applied to the physical timer/counter */ u64 poffset; + + struct mutex lock; + + /* The PPI for each timer, global to the VM */ + u8 ppi[NR_KVM_TIMERS]; }; struct arch_timer_context { struct kvm_vcpu *vcpu; - /* Timer IRQ */ - struct kvm_irq_level irq; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; u64 ns_frac; @@ -57,6 +59,11 @@ struct arch_timer_context { */ bool loaded; + /* Output level of the timer IRQ */ + struct { + bool level; + } irq; + /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; }; @@ -86,6 +93,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); void kvm_timer_update_run(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); +void kvm_timer_init_vm(struct kvm *kvm); + u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); @@ -109,7 +118,8 @@ bool kvm_arch_timer_get_input_level(int vintid); #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) -#define timer_irq(ctx) ((ctx)->irq.irq) +#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) +#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, -- cgit From 1a6511eb8430533920559c5f01f487f4901081cd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:52 +0100 Subject: KVM: arm64: Elide kern_hyp_va() in VHE-specific parts of the hypervisor For VHE-specific hypervisor code, kern_hyp_va() is a NOP. Actually, it is a whole range of NOPs. It'd be much better if this code simply didn't exist. Let's just do that. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-13-maz@kernel.org --- arch/arm64/include/asm/kvm_mmu.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 083cc47dca08..27e63c111f78 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -63,6 +63,7 @@ * specific registers encoded in the instructions). */ .macro kern_hyp_va reg +#ifndef __KVM_VHE_HYPERVISOR__ alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask and \reg, \reg, #1 /* mask with va_mask */ ror \reg, \reg, #1 /* rotate to the first tag bit */ @@ -70,6 +71,7 @@ alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ ror \reg, \reg, #63 /* rotate back */ alternative_cb_end +#endif .endm /* @@ -127,6 +129,7 @@ void kvm_apply_hyp_relocations(void); static __always_inline unsigned long __kern_hyp_va(unsigned long v) { +#ifndef __KVM_VHE_HYPERVISOR__ asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" "ror %0, %0, #1\n" "add %0, %0, #0\n" @@ -135,6 +138,7 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) ARM64_ALWAYS_SYSTEM, kvm_update_va_mask) : "+r" (v)); +#endif return v; } -- cgit From e9adde432bf7371f1c83f67d9f8d75b95810f124 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:53 +0100 Subject: KVM: arm64: timers: Fast-track CNTPCT_EL0 trap handling Now that it is likely that CNTPCT_EL0 accesses will trap, fast-track the emulation of the counter read which doesn't need more that a simple offsetting. One day, we'll have CNTPOFF everywhere. One day. Suggested-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-14-maz@kernel.org --- arch/arm64/kvm/hyp/include/hyp/switch.h | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 07d37ff88a3f..9954368f639d 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -326,6 +327,38 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } +static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *ctxt; + u32 sysreg; + u64 val; + + /* + * We only get here for 64bit guests, 32bit guests will hit + * the long and winding road all the way to the standard + * handling. Yes, it sucks to be irrelevant. + */ + sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + + switch (sysreg) { + case SYS_CNTPCT_EL0: + case SYS_CNTPCTSS_EL0: + ctxt = vcpu_ptimer(vcpu); + break; + default: + return false; + } + + val = arch_timer_read_cntpct_el0(); + + if (ctxt->offset.vm_offset) + val -= *kern_hyp_va(ctxt->offset.vm_offset); + + vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); + __kvm_skip_instr(vcpu); + return true; +} + static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) { if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && @@ -339,6 +372,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) return kvm_hyp_handle_ptrauth(vcpu, exit_code); + if (kvm_hyp_handle_cntpct(vcpu)) + return true; + return false; } -- cgit From 476fcd4b7bb54ac959b683f30d0cf305c3e11f3c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:54 +0100 Subject: KVM: arm64: timers: Abstract the number of valid timers per vcpu We so far have a pretty fixed number of timers to take care of. This is about to change as NV brings another two into the picture, and we must be careful not to try and emulate non-valid timers in a given VM. For this, abstract the number of timers for a given vcpu behind an accessor, which helpfully returns a constant for now. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-15-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 1d811735e05f..d3a7902269c1 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -52,6 +52,11 @@ static bool has_cntpoff(void) return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); } +static int nr_timers(struct kvm_vcpu *vcpu) +{ + return NR_KVM_TIMERS; +} + u32 timer_get_ctl(struct arch_timer_context *ctxt) { struct kvm_vcpu *vcpu = ctxt->vcpu; @@ -255,7 +260,7 @@ static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) u64 min_delta = ULLONG_MAX; int i; - for (i = 0; i < NR_KVM_TIMERS; i++) { + for (i = 0; i < nr_timers(vcpu); i++) { struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; WARN(ctx->loaded, "timer %d loaded\n", i); @@ -815,12 +820,12 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) * resets the timer to be disabled and unmasked and is compliant with * the ARMv7 architecture. */ - for (int i = 0; i < NR_KVM_TIMERS; i++) + for (int i = 0; i < nr_timers(vcpu); i++) timer_set_ctl(vcpu_get_timer(vcpu, i), 0); if (timer->enabled) { - for (int i = 0; i < NR_KVM_TIMERS; i++) + for (int i = 0; i < nr_timers(vcpu); i++) kvm_timer_update_irq(vcpu, false, vcpu_get_timer(vcpu, i)); @@ -1303,7 +1308,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) mutex_lock(&vcpu->kvm->arch.timer_data.lock); - for (int i = 0; i < NR_KVM_TIMERS; i++) { + for (int i = 0; i < nr_timers(vcpu); i++) { struct arch_timer_context *ctx; int irq; @@ -1319,7 +1324,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) ppis |= BIT(irq); } - valid = hweight32(ppis) == NR_KVM_TIMERS; + valid = hweight32(ppis) == nr_timers(vcpu); if (valid) set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); @@ -1336,7 +1341,7 @@ bool kvm_arch_timer_get_input_level(int vintid) if (WARN(!vcpu, "No vcpu context!\n")) return false; - for (int i = 0; i < NR_KVM_TIMERS; i++) { + for (int i = 0; i < nr_timers(vcpu); i++) { struct arch_timer_context *ctx; ctx = vcpu_get_timer(vcpu, i); -- cgit From 1935d34afaebe01ddb75bfaa62fb7fe957ddc210 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:55 +0100 Subject: KVM: arm64: Document KVM_ARM_SET_CNT_OFFSETS and co Add some basic documentation on the effects of KVM_ARM_SET_CNT_OFFSETS. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-16-maz@kernel.org --- Documentation/virt/kvm/api.rst | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 62de0768d6aa..192adcb61add 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6029,6 +6029,44 @@ delivery must be provided via the "reg_aen" struct. The "pad" and "reserved" fields may be used for future extensions and should be set to 0s by userspace. +4.138 KVM_ARM_SET_COUNTER_OFFSET +-------------------------------- + +:Capability: KVM_CAP_COUNTER_OFFSET +:Architectures: arm64 +:Type: vm ioctl +:Parameters: struct kvm_arm_counter_offset (in) +:Returns: 0 on success, < 0 on error + +This capability indicates that userspace is able to apply a single VM-wide +offset to both the virtual and physical counters as viewed by the guest +using the KVM_ARM_SET_CNT_OFFSET ioctl and the following data structure: + +:: + + struct kvm_arm_counter_offset { + __u64 counter_offset; + __u64 reserved; + }; + +The offset describes a number of counter cycles that are subtracted from +both virtual and physical counter views (similar to the effects of the +CNTVOFF_EL2 and CNTPOFF_EL2 system registers, but only global). The offset +always applies to all vcpus (already created or created after this ioctl) +for this VM. + +It is userspace's responsibility to compute the offset based, for example, +on previous values of the guest counters. + +Any value other than 0 for the "reserved" field may result in an error +(-EINVAL) being returned. This ioctl can also return -EBUSY if any vcpu +ioctl is issued concurrently. + +Note that using this ioctl results in KVM ignoring subsequent userspace +writes to the CNTVCT_EL0 and CNTPCT_EL0 registers using the SET_ONE_REG +interface. No error will be returned, but the resulting offset will not be +applied. + 5. The kvm_run structure ======================== -- cgit From 1e0eec09d43a55125ff80e40b2d6e2f369a338b9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:56 +0100 Subject: KVM: arm64: nv: timers: Add a per-timer, per-vcpu offset Being able to set a global offset isn't enough. With NV, we also need to a per-vcpu, per-timer offset (for example, CNTVCT_EL0 being offset by CNTVOFF_EL2). Use a similar method as the VM-wide offset to have a timer point to the shadow register that contains the offset value. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-17-maz@kernel.org --- arch/arm64/kvm/arch_timer.c | 13 ++++++++++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 ++ include/kvm/arm_arch_timer.h | 5 +++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d3a7902269c1..b87bf182af33 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -89,10 +89,17 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static u64 timer_get_offset(struct arch_timer_context *ctxt) { - if (ctxt && ctxt->offset.vm_offset) - return *ctxt->offset.vm_offset; + u64 offset = 0; - return 0; + if (!ctxt) + return 0; + + if (ctxt->offset.vm_offset) + offset += *ctxt->offset.vm_offset; + if (ctxt->offset.vcpu_offset) + offset += *ctxt->offset.vcpu_offset; + + return offset; } static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 9954368f639d..d07cbc313889 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -353,6 +353,8 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) if (ctxt->offset.vm_offset) val -= *kern_hyp_va(ctxt->offset.vm_offset); + if (ctxt->offset.vcpu_offset) + val -= *kern_hyp_va(ctxt->offset.vcpu_offset); vcpu_set_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu), val); __kvm_skip_instr(vcpu); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index f093ea9f540d..209da0c2ac9f 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -29,6 +29,11 @@ struct arch_timer_offset { * structure. If NULL, assume a zero offset. */ u64 *vm_offset; + /* + * If set, pointer to one of the offsets in the vcpu's sysreg + * array. If NULL, assume a zero offset. + */ + u64 *vcpu_offset; }; struct arch_timer_vm_data { -- cgit From 81dc9504a7006b484cfcf074796094ee526b0c45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:57 +0100 Subject: KVM: arm64: nv: timers: Support hyp timer emulation Emulating EL2 also means emulating the EL2 timers. To do so, we expand our timer framework to deal with at most 4 timers. At any given time, two timers are using the HW timers, and the two others are purely emulated. The role of deciding which is which at any given time is left to a mapping function which is called every time we need to make such a decision. Reviewed-by: Colton Lewis Co-developed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-18-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 4 + arch/arm64/include/uapi/asm/kvm.h | 2 + arch/arm64/kvm/arch_timer.c | 180 ++++++++++++++++++++++++++++++-- arch/arm64/kvm/hyp/include/hyp/switch.h | 15 +++ arch/arm64/kvm/trace_arm.h | 6 +- arch/arm64/kvm/vgic/vgic.c | 15 +++ include/kvm/arm_arch_timer.h | 9 +- include/kvm/arm_vgic.h | 1 + 8 files changed, 220 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1280154c9ef3..633a7c0750bb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -369,6 +369,10 @@ enum vcpu_sysreg { TPIDR_EL2, /* EL2 Software Thread ID Register */ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */ SP_EL2, /* EL2 Stack Pointer */ + CNTHP_CTL_EL2, + CNTHP_CVAL_EL2, + CNTHV_CTL_EL2, + CNTHV_CVAL_EL2, NR_SYS_REGS /* Nothing after this line! */ }; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 12fb0d8a760a..0921f366c49f 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -420,6 +420,8 @@ enum { #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2 +#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3 #define KVM_ARM_VCPU_PVTIME_CTRL 2 #define KVM_ARM_VCPU_PVTIME_IPA 0 diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index b87bf182af33..c5c8cc3c25ae 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,8 @@ static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); static const u8 default_ppi[] = { [TIMER_PTIMER] = 30, [TIMER_VTIMER] = 27, + [TIMER_HPTIMER] = 26, + [TIMER_HVTIMER] = 28, }; static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); @@ -46,6 +49,11 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg); +static bool kvm_arch_timer_get_input_level(int vintid); + +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; static bool has_cntpoff(void) { @@ -54,6 +62,9 @@ static bool has_cntpoff(void) static int nr_timers(struct kvm_vcpu *vcpu) { + if (!vcpu_has_nv(vcpu)) + return NR_KVM_EL0_TIMERS; + return NR_KVM_TIMERS; } @@ -66,6 +77,10 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt) return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); case TIMER_PTIMER: return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); + case TIMER_HVTIMER: + return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); + case TIMER_HPTIMER: + return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); default: WARN_ON(1); return 0; @@ -81,6 +96,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); case TIMER_PTIMER: return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + case TIMER_HVTIMER: + return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); + case TIMER_HPTIMER: + return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); default: WARN_ON(1); return 0; @@ -113,6 +132,12 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) case TIMER_PTIMER: __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; break; + case TIMER_HVTIMER: + __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; + break; + case TIMER_HPTIMER: + __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; + break; default: WARN_ON(1); } @@ -129,6 +154,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) case TIMER_PTIMER: __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; break; + case TIMER_HVTIMER: + __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; + break; + case TIMER_HPTIMER: + __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; + break; default: WARN_ON(1); } @@ -151,13 +182,27 @@ u64 kvm_phys_timer_read(void) static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) { - if (has_vhe()) { + if (vcpu_has_nv(vcpu)) { + if (is_hyp_ctxt(vcpu)) { + map->direct_vtimer = vcpu_hvtimer(vcpu); + map->direct_ptimer = vcpu_hptimer(vcpu); + map->emul_vtimer = vcpu_vtimer(vcpu); + map->emul_ptimer = vcpu_ptimer(vcpu); + } else { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_vtimer = vcpu_hvtimer(vcpu); + map->emul_ptimer = vcpu_hptimer(vcpu); + } + } else if (has_vhe()) { map->direct_vtimer = vcpu_vtimer(vcpu); map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_vtimer = NULL; map->emul_ptimer = NULL; } else { map->direct_vtimer = vcpu_vtimer(vcpu); map->direct_ptimer = NULL; + map->emul_vtimer = NULL; map->emul_ptimer = vcpu_ptimer(vcpu); } @@ -252,8 +297,11 @@ static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) { - struct arch_timer_context *ctx = vcpu_vtimer(vcpu); u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + struct arch_timer_context *ctx; + + ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu) + : vcpu_vtimer(vcpu); return kvm_counter_compute_delta(ctx, val); } @@ -350,9 +398,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) switch (index) { case TIMER_VTIMER: + case TIMER_HVTIMER: cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); break; case TIMER_PTIMER: + case TIMER_HPTIMER: cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); break; case NR_KVM_TIMERS: @@ -468,6 +518,7 @@ static void timer_save_state(struct arch_timer_context *ctx) u64 cval; case TIMER_VTIMER: + case TIMER_HVTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); @@ -493,6 +544,7 @@ static void timer_save_state(struct arch_timer_context *ctx) set_cntvoff(0); break; case TIMER_PTIMER: + case TIMER_HPTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); cval = read_sysreg_el0(SYS_CNTP_CVAL); @@ -536,6 +588,7 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu) */ if (!kvm_timer_irq_can_fire(map.direct_vtimer) && !kvm_timer_irq_can_fire(map.direct_ptimer) && + !kvm_timer_irq_can_fire(map.emul_vtimer) && !kvm_timer_irq_can_fire(map.emul_ptimer) && !vcpu_has_wfit_active(vcpu)) return; @@ -572,12 +625,14 @@ static void timer_restore_state(struct arch_timer_context *ctx) u64 cval, offset; case TIMER_VTIMER: + case TIMER_HVTIMER: set_cntvoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); break; case TIMER_PTIMER: + case TIMER_HPTIMER: cval = timer_get_cval(ctx); offset = timer_get_offset(ctx); set_cntpoff(offset); @@ -663,6 +718,57 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) (_clr) |= (_bit); \ } while (0) +static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, + struct timer_map *map) +{ + int hw, ret; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + /* + * We only ever unmap the vtimer irq on a VHE system that runs nested + * virtualization, in which case we have both a valid emul_vtimer, + * emul_ptimer, direct_vtimer, and direct_ptimer. + * + * Since this is called from kvm_timer_vcpu_load(), a change between + * vEL2 and vEL1/0 will have just happened, and the timer_map will + * represent this, and therefore we switch the emul/direct mappings + * below. + */ + hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); + if (hw < 0) { + kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); + kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); + + ret = kvm_vgic_map_phys_irq(vcpu, + map->direct_vtimer->host_timer_irq, + timer_irq(map->direct_vtimer), + &arch_timer_irq_ops); + WARN_ON_ONCE(ret); + ret = kvm_vgic_map_phys_irq(vcpu, + map->direct_ptimer->host_timer_irq, + timer_irq(map->direct_ptimer), + &arch_timer_irq_ops); + WARN_ON_ONCE(ret); + + /* + * The virtual offset behaviour is "interresting", as it + * always applies when HCR_EL2.E2H==0, but only when + * accessed from EL1 when HCR_EL2.E2H==1. So make sure we + * track E2H when putting the HV timer in "direct" mode. + */ + if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { + struct arch_timer_offset *offs = &map->direct_vtimer->offset; + + if (vcpu_el2_e2h_is_set(vcpu)) + offs->vcpu_offset = NULL; + else + offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + } + } +} + static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) { bool tpt, tpc; @@ -695,6 +801,22 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) tpt = tpc = true; + /* + * Apply the enable bits that the guest hypervisor has requested for + * its own guest. We can only add traps that wouldn't have been set + * above. + */ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + + /* Use the VHE format for mental sanity */ + if (!vcpu_el2_e2h_is_set(vcpu)) + val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; + + tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); + tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); + } + /* * Now that we have collected our requirements, compute the * trap and enable bits. @@ -720,6 +842,9 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); if (static_branch_likely(&has_gic_active_state)) { + if (vcpu_has_nv(vcpu)) + kvm_timer_vcpu_load_nested_switch(vcpu, &map); + kvm_timer_vcpu_load_gic(map.direct_vtimer); if (map.direct_ptimer) kvm_timer_vcpu_load_gic(map.direct_ptimer); @@ -732,6 +857,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) timer_restore_state(map.direct_vtimer); if (map.direct_ptimer) timer_restore_state(map.direct_ptimer); + if (map.emul_vtimer) + timer_emulate(map.emul_vtimer); if (map.emul_ptimer) timer_emulate(map.emul_ptimer); @@ -778,6 +905,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * In any case, we re-schedule the hrtimer for the physical timer when * coming back to the VCPU thread in kvm_timer_vcpu_load(). */ + if (map.emul_vtimer) + soft_timer_cancel(&map.emul_vtimer->hrtimer); if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); @@ -830,6 +959,17 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) for (int i = 0; i < nr_timers(vcpu); i++) timer_set_ctl(vcpu_get_timer(vcpu, i), 0); + /* + * A vcpu running at EL2 is in charge of the offset applied to + * the virtual timer, so use the physical VM offset, and point + * the vcpu offset to CNTVOFF_EL2. + */ + if (vcpu_has_nv(vcpu)) { + struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; + + offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; + } if (timer->enabled) { for (int i = 0; i < nr_timers(vcpu); i++) @@ -843,6 +983,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) } } + if (map.emul_vtimer) + soft_timer_cancel(&map.emul_vtimer->hrtimer); if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); @@ -866,9 +1008,11 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) switch (timerid) { case TIMER_PTIMER: + case TIMER_HPTIMER: ctxt->host_timer_irq = host_ptimer_irq; break; case TIMER_VTIMER: + case TIMER_HVTIMER: ctxt->host_timer_irq = host_vtimer_irq; break; } @@ -1020,6 +1164,10 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, val = kvm_phys_timer_read() - timer_get_offset(timer); break; + case TIMER_REG_VOFF: + val = *timer->offset.vcpu_offset; + break; + default: BUG(); } @@ -1038,7 +1186,7 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, get_timer_map(vcpu, &map); timer = vcpu_get_timer(vcpu, tmr); - if (timer == map.emul_ptimer) + if (timer == map.emul_vtimer || timer == map.emul_ptimer) return kvm_arm_timer_read(vcpu, timer, treg); preempt_disable(); @@ -1070,6 +1218,10 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, timer_set_cval(timer, val); break; + case TIMER_REG_VOFF: + *timer->offset.vcpu_offset = val; + break; + default: BUG(); } @@ -1085,7 +1237,7 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, get_timer_map(vcpu, &map); timer = vcpu_get_timer(vcpu, tmr); - if (timer == map.emul_ptimer) { + if (timer == map.emul_vtimer || timer == map.emul_ptimer) { soft_timer_cancel(&timer->hrtimer); kvm_arm_timer_write(vcpu, timer, treg, val); timer_emulate(timer); @@ -1165,10 +1317,6 @@ static const struct irq_domain_ops timer_domain_ops = { .free = timer_irq_domain_free, }; -static struct irq_ops arch_timer_irq_ops = { - .get_input_level = kvm_arch_timer_get_input_level, -}; - static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) { *flags = irq_get_trigger_type(virq); @@ -1341,7 +1489,7 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) return valid; } -bool kvm_arch_timer_get_input_level(int vintid) +static bool kvm_arch_timer_get_input_level(int vintid) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); @@ -1444,6 +1592,12 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: idx = TIMER_PTIMER; break; + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + idx = TIMER_HVTIMER; + break; + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: + idx = TIMER_HPTIMER; + break; default: ret = -ENXIO; goto out; @@ -1474,6 +1628,12 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: timer = vcpu_ptimer(vcpu); break; + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + timer = vcpu_hvtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: + timer = vcpu_hptimer(vcpu); + break; default: return -ENXIO; } @@ -1487,6 +1647,8 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: return 0; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index d07cbc313889..c41166f1a1dd 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -343,6 +343,21 @@ static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu) switch (sysreg) { case SYS_CNTPCT_EL0: case SYS_CNTPCTSS_EL0: + if (vcpu_has_nv(vcpu)) { + if (is_hyp_ctxt(vcpu)) { + ctxt = vcpu_hptimer(vcpu); + break; + } + + /* Check for guest hypervisor trapping */ + val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); + if (!vcpu_el2_e2h_is_set(vcpu)) + val = (val & CNTHCTL_EL1PCTEN) << 10; + + if (!(val & (CNTHCTL_EL1PCTEN << 10))) + return false; + } + ctxt = vcpu_ptimer(vcpu); break; default: diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index f3e46a976125..6ce5c025218d 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -206,6 +206,7 @@ TRACE_EVENT(kvm_get_timer_map, __field( unsigned long, vcpu_id ) __field( int, direct_vtimer ) __field( int, direct_ptimer ) + __field( int, emul_vtimer ) __field( int, emul_ptimer ) ), @@ -214,14 +215,17 @@ TRACE_EVENT(kvm_get_timer_map, __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); __entry->direct_ptimer = (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; + __entry->emul_vtimer = + (map->emul_vtimer) ? arch_timer_ctx_index(map->emul_vtimer) : -1; __entry->emul_ptimer = (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; ), - TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", + TP_printk("VCPU: %ld, dv: %d, dp: %d, ev: %d, ep: %d", __entry->vcpu_id, __entry->direct_vtimer, __entry->direct_ptimer, + __entry->emul_vtimer, __entry->emul_ptimer) ); diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index d97e6080b421..ae491ef97188 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -573,6 +573,21 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) return 0; } +int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + int ret = -1; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw) + ret = irq->hwintid; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + return ret; +} + /** * kvm_vgic_set_owner - Set the owner of an interrupt for a VM * diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 209da0c2ac9f..52008f5cff06 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -13,6 +13,9 @@ enum kvm_arch_timers { TIMER_PTIMER, TIMER_VTIMER, + NR_KVM_EL0_TIMERS, + TIMER_HVTIMER = NR_KVM_EL0_TIMERS, + TIMER_HPTIMER, NR_KVM_TIMERS }; @@ -21,6 +24,7 @@ enum kvm_arch_timer_regs { TIMER_REG_CVAL, TIMER_REG_TVAL, TIMER_REG_CTL, + TIMER_REG_VOFF, }; struct arch_timer_offset { @@ -76,6 +80,7 @@ struct arch_timer_context { struct timer_map { struct arch_timer_context *direct_vtimer; struct arch_timer_context *direct_ptimer; + struct arch_timer_context *emul_vtimer; struct arch_timer_context *emul_ptimer; }; @@ -114,12 +119,12 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu); void kvm_timer_init_vhe(void); -bool kvm_arch_timer_get_input_level(int vintid); - #define vcpu_timer(v) (&(v)->arch.timer_cpu) #define vcpu_get_timer(v,t) (&vcpu_timer(v)->timers[(t)]) #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_VTIMER]) #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_PTIMER]) +#define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER]) +#define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d3ad51fde9db..402b545959af 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -380,6 +380,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, u32 vintid, struct irq_ops *ops); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); +int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -- cgit From 0630fb8e0a4873e436f0c1c1b27fa60a37eb960c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:58 +0100 Subject: KVM: arm64: selftests: Add physical timer registers to the sysreg list Now that KVM exposes CNTPCT_EL0, CNTP_CTL_EL0 and CNT_CVAL_EL0 to userspace, add them to the get-reg-list selftest. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-19-maz@kernel.org --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index d287dd2cac0a..1b976b333d2c 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -651,7 +651,7 @@ int main(int ac, char **av) * The current blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. * - * The blessed list is up to date with kernel version v5.13-rc3 + * The blessed list is up to date with kernel version v6.4 (or so we hope) */ static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), @@ -858,6 +858,9 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */ + ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */ ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ -- cgit From 056c15669a01677ba3e44456580bf4a351f71ff7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:47:59 +0100 Subject: KVM: arm64: selftests: Deal with spurious timer interrupts Make sure the timer test can properly handle a spurious timer interrupt, something that is far from being unlikely. This involves checking for the GIC IAR return value (don't bother handling the interrupt if it was spurious) as well as the timer control register (don't do anything if the interrupt is masked or the timer disabled). Take this opportunity to rewrite the timer handler in a more readable way. This solves a bunch of failures that creep up on systems that are slow to retire the interrupt, something that the GIC architecture makes no guarantee about. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-20-maz@kernel.org --- tools/testing/selftests/kvm/aarch64/arch_timer.c | 40 +++++++++++++++--------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 26556a266021..176ab41dd01b 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -121,25 +121,35 @@ static void guest_validate_irq(unsigned int intid, uint64_t xcnt = 0, xcnt_diff_us, cval = 0; unsigned long xctl = 0; unsigned int timer_irq = 0; + unsigned int accessor; - if (stage == GUEST_STAGE_VTIMER_CVAL || - stage == GUEST_STAGE_VTIMER_TVAL) { - xctl = timer_get_ctl(VIRTUAL); - timer_set_ctl(VIRTUAL, CTL_IMASK); - xcnt = timer_get_cntct(VIRTUAL); - cval = timer_get_cval(VIRTUAL); + if (intid == IAR_SPURIOUS) + return; + + switch (stage) { + case GUEST_STAGE_VTIMER_CVAL: + case GUEST_STAGE_VTIMER_TVAL: + accessor = VIRTUAL; timer_irq = vtimer_irq; - } else if (stage == GUEST_STAGE_PTIMER_CVAL || - stage == GUEST_STAGE_PTIMER_TVAL) { - xctl = timer_get_ctl(PHYSICAL); - timer_set_ctl(PHYSICAL, CTL_IMASK); - xcnt = timer_get_cntct(PHYSICAL); - cval = timer_get_cval(PHYSICAL); + break; + case GUEST_STAGE_PTIMER_CVAL: + case GUEST_STAGE_PTIMER_TVAL: + accessor = PHYSICAL; timer_irq = ptimer_irq; - } else { + break; + default: GUEST_ASSERT(0); + return; } + xctl = timer_get_ctl(accessor); + if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE)) + return; + + timer_set_ctl(accessor, CTL_IMASK); + xcnt = timer_get_cntct(accessor); + cval = timer_get_cval(accessor); + xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); /* Make sure we are dealing with the correct timer IRQ */ @@ -148,6 +158,8 @@ static void guest_validate_irq(unsigned int intid, /* Basic 'timer condition met' check */ GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us); GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl); + + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); } static void guest_irq_handler(struct ex_regs *regs) @@ -158,8 +170,6 @@ static void guest_irq_handler(struct ex_regs *regs) guest_validate_irq(intid, shared_data); - WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); - gic_set_eoi(intid); } -- cgit From 2fe9e0fc21602339b82cdba58ef81a5a97d90ca2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Mar 2023 18:48:00 +0100 Subject: KVM: arm64: selftests: Augment existing timer test to handle variable offset Allow a user to specify the global offset on the command-line. Reviewed-by: Colton Lewis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230330174800.2677007-21-maz@kernel.org --- tools/testing/selftests/kvm/aarch64/arch_timer.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 176ab41dd01b..8ef370924a02 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -47,6 +47,7 @@ struct test_args { int nr_iter; int timer_period_ms; int migration_freq_ms; + struct kvm_arm_counter_offset offset; }; static struct test_args test_args = { @@ -54,6 +55,7 @@ static struct test_args test_args = { .nr_iter = NR_TEST_ITERS_DEF, .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF, .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS, + .offset = { .reserved = 1 }, }; #define msecs_to_usecs(msec) ((msec) * 1000LL) @@ -382,6 +384,13 @@ static struct kvm_vm *test_vm_create(void) vm_init_descriptor_tables(vm); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); + if (!test_args.offset.reserved) { + if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) + vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset); + else + TEST_FAIL("no support for global offset\n"); + } + for (i = 0; i < nr_vcpus; i++) vcpu_init_descriptor_tables(vcpus[i]); @@ -413,6 +422,7 @@ static void test_print_help(char *name) TIMER_TEST_PERIOD_MS_DEF); pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n", TIMER_TEST_MIGRATION_FREQ_MS); + pr_info("\t-o: Counter offset (in counter cycles, default: 0)\n"); pr_info("\t-h: print this help screen\n"); } @@ -420,7 +430,7 @@ static bool parse_args(int argc, char *argv[]) { int opt; - while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) { + while ((opt = getopt(argc, argv, "hn:i:p:m:o:")) != -1) { switch (opt) { case 'n': test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg); @@ -439,6 +449,10 @@ static bool parse_args(int argc, char *argv[]) case 'm': test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg); break; + case 'o': + test_args.offset.counter_offset = strtol(optarg, NULL, 0); + test_args.offset.reserved = 0; + break; case 'h': default: goto err; -- cgit From a2bed39057b434c4fd816005d1b950fefc61569d Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 8 Mar 2023 11:09:46 +0000 Subject: KVM: selftests: Fixup config fragment for access_tracking_perf_test access_tracking_perf_test requires CONFIG_IDLE_PAGE_TRACKING. However this is missing from the config fragment, so add it in so that this test is no longer skipped. Signed-off-by: Ryan Roberts Reviewed-by: Sean Christopherson Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230308110948.1820163-2-ryan.roberts@arm.com --- tools/testing/selftests/kvm/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config index d011b38e259e..8835fed09e9f 100644 --- a/tools/testing/selftests/kvm/config +++ b/tools/testing/selftests/kvm/config @@ -2,3 +2,4 @@ CONFIG_KVM=y CONFIG_KVM_INTEL=y CONFIG_KVM_AMD=y CONFIG_USERFAULTFD=y +CONFIG_IDLE_PAGE_TRACKING=y -- cgit From e659babfc5a693553cf9473470840464f0ed5d77 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 8 Mar 2023 11:09:47 +0000 Subject: KVM: selftests: arm64: Fix pte encode/decode for PA bits > 48 The high bits [51:48] of a physical address should appear at [15:12] in a 64K pte, not at [51:48] as was previously being programmed. Fix this with new helper functions that do the conversion correctly. This also sets us up nicely for adding LPA2 encodings in future. Fixes: 7a6629ef746d ("kvm: selftests: add virt mem support for aarch64") Signed-off-by: Ryan Roberts Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230308110948.1820163-3-ryan.roberts@arm.com --- .../testing/selftests/kvm/lib/aarch64/processor.c | 32 ++++++++++++++++------ 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 5972a23b2765..c413085d58b7 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -58,10 +58,27 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) return (gva >> vm->page_shift) & mask; } -static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) { - uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift; - return entry & mask; + uint64_t pte; + + pte = pa & GENMASK(47, vm->page_shift); + if (vm->page_shift == 16) + pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= attrs; + + return pte; +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) +{ + uint64_t pa; + + pa = pte & GENMASK(47, vm->page_shift); + if (vm->page_shift == 16) + pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + + return pa; } static uint64_t ptrs_per_pgd(struct kvm_vm *vm) @@ -110,18 +127,18 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = vm_alloc_page_table(vm) | 3; + *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) - *ptep = vm_alloc_page_table(vm) | 3; + *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = vm_alloc_page_table(vm) | 3; + *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -130,8 +147,7 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = paddr | 3; - *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */; + *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) -- cgit From e17071754cf50e5f6ff8ebee077e0e4114d3bea5 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 8 Mar 2023 11:09:48 +0000 Subject: KVM: selftests: arm64: Fix ttbr0_el1 encoding for PA bits > 48 Bits [51:48] of the pgd address are stored at bits [5:2] of ttbr0_el1. page_table_test stores its page tables at the far end of IPA space so was tripping over this when run on a system that supports FEAT_LPA (or FEAT_LPA2). Signed-off-by: Ryan Roberts Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230308110948.1820163-4-ryan.roberts@arm.com --- tools/testing/selftests/kvm/lib/aarch64/processor.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index c413085d58b7..233357d2f1cc 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -242,7 +242,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; struct kvm_vm *vm = vcpu->vm; - uint64_t sctlr_el1, tcr_el1; + uint64_t sctlr_el1, tcr_el1, ttbr0_el1; if (!init) init = &default_init; @@ -293,10 +293,13 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } + ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift); + /* Configure output size */ switch (vm->mode) { case VM_MODE_P52V48_64K: tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: @@ -326,7 +329,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); } -- cgit From 767cc0501bbb51f2daad35d1bc4f6eaa857ed057 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 6 Mar 2023 16:15:07 +0000 Subject: KVM: selftests: Comment newly defined aarch64 ID registers All otherwise unspecified aarch64 ID registers should be read as zero so we cover the whole ID register space in the get-reg-list test but we've added comments for those that have been named. Add comments for ID_AA64PFR2_EL1, ID_AA64SMFR0_EL1, ID_AA64ISAR2_EL1, ID_AA64MMFR3_EL1 and ID_AA64MMFR4_EL1 which have been defined since the comments were added so someone looking for them will see that they are covered. Signed-off-by: Mark Brown Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230210-kvm-arm64-getreg-comments-v1-1-a16c73be5ab4@kernel.org --- tools/testing/selftests/kvm/aarch64/get-reg-list.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index d287dd2cac0a..df8a8afca4fc 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -807,10 +807,10 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 0, 3, 7), ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */ ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 4, 2), + ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */ ARM64_SYS_REG(3, 0, 0, 4, 3), ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 4, 5), + ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */ ARM64_SYS_REG(3, 0, 0, 4, 6), ARM64_SYS_REG(3, 0, 0, 4, 7), ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ @@ -823,7 +823,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 0, 5, 7), ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ - ARM64_SYS_REG(3, 0, 0, 6, 2), + ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */ ARM64_SYS_REG(3, 0, 0, 6, 3), ARM64_SYS_REG(3, 0, 0, 6, 4), ARM64_SYS_REG(3, 0, 0, 6, 5), @@ -832,8 +832,8 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), - ARM64_SYS_REG(3, 0, 0, 7, 4), + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 4), /* ID_AA64MMFR4_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 5), ARM64_SYS_REG(3, 0, 0, 7, 6), ARM64_SYS_REG(3, 0, 0, 7, 7), -- cgit From e65733b5c59a1ea20324a03494364958bef3fc68 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:38 +0000 Subject: KVM: x86: Redefine 'longmode' as a flag for KVM_EXIT_HYPERCALL The 'longmode' field is a bit annoying as it blows an entire __u32 to represent a boolean value. Since other architectures are looking to add support for KVM_EXIT_HYPERCALL, now is probably a good time to clean it up. Redefine the field (and the remaining padding) as a set of flags. Preserve the existing ABI by using bit 0 to indicate if the guest was in long mode and requiring that the remaining 31 bits must be zero. Cc: Paolo Bonzini Acked-by: Sean Christopherson Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-2-oliver.upton@linux.dev --- Documentation/virt/kvm/api.rst | 3 +-- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/include/uapi/asm/kvm.h | 3 +++ arch/x86/kvm/x86.c | 6 +++++- include/uapi/linux/kvm.h | 9 +++++++-- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 62de0768d6aa..9b01e3d0e757 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6218,8 +6218,7 @@ to the byte array. __u64 nr; __u64 args[6]; __u64 ret; - __u32 longmode; - __u32 pad; + __u64 flags; } hypercall; Unused. This was once used for 'hypercall to userspace'. To implement diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 808c292ad3f4..15bda40517ff 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2204,4 +2204,11 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) +/* + * KVM previously used a u32 field in kvm_run to indicate the hypercall was + * initiated from long mode. KVM now sets bit 0 to indicate long mode, but the + * remaining 31 lower bits must be 0 to preserve ABI. + */ +#define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 7f467fe05d42..1a6a1f987949 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -559,4 +559,7 @@ struct kvm_pmu_event_filter { #define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ +/* x86-specific KVM_EXIT_HYPERCALL flags. */ +#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7713420abab0..27a1d5c1a018 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9803,7 +9803,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) vcpu->run->hypercall.args[0] = gpa; vcpu->run->hypercall.args[1] = npages; vcpu->run->hypercall.args[2] = attrs; - vcpu->run->hypercall.longmode = op_64_bit; + vcpu->run->hypercall.flags = 0; + if (op_64_bit) + vcpu->run->hypercall.flags |= KVM_EXIT_HYPERCALL_LONG_MODE; + + WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ); vcpu->arch.complete_userspace_io = complete_hypercall_exit; return 0; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d77aef872a0a..dd42d7dfb86c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -341,8 +341,13 @@ struct kvm_run { __u64 nr; __u64 args[6]; __u64 ret; - __u32 longmode; - __u32 pad; + + union { +#ifndef __KERNEL__ + __u32 longmode; +#endif + __u64 flags; + }; } hypercall; /* KVM_EXIT_TPR_ACCESS */ struct { -- cgit From de40bb8abb764f6866d82c4e2a43acdb22892cf4 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:39 +0000 Subject: KVM: arm64: Add a helper to check if a VM has ran once The test_bit(...) pattern is quite a lot of keystrokes. Replace existing callsites with a helper. No functional change intended. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-3-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/hypercalls.c | 3 +-- arch/arm64/kvm/pmu-emul.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bcd774d74f34..d091d1c9890b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1061,6 +1061,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); (system_supports_32bit_el0() && \ !static_branch_unlikely(&arm64_mismatched_32bit_el0)) +#define kvm_vm_has_ran_once(kvm) \ + (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags)) + int kvm_trng_call(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM extern phys_addr_t hyp_mem_base; diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5da884e11337..a09a526a7d7c 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -379,8 +379,7 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) mutex_lock(&kvm->lock); - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) && - val != *fw_reg_bmap) { + if (kvm_vm_has_ran_once(kvm) && val != *fw_reg_bmap) { ret = -EBUSY; goto out; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 24908400e190..a0fc569fdbca 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -880,7 +880,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) list_for_each_entry(entry, &arm_pmus, entry) { arm_pmu = entry->arm_pmu; if (arm_pmu->pmu.type == pmu_id) { - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) || + if (kvm_vm_has_ran_once(kvm) || (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { ret = -EBUSY; break; @@ -963,7 +963,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) mutex_lock(&kvm->lock); - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { + if (kvm_vm_has_ran_once(kvm)) { mutex_unlock(&kvm->lock); return -EBUSY; } -- cgit From e0fc6b21616dd917899ee4a2d4126b4a963c0871 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:40 +0000 Subject: KVM: arm64: Add vm fd device attribute accessors A subsequent change will allow userspace to convey a filter for hypercalls through a vm device attribute. Add the requisite boilerplate for vm attribute accessors. Reviewed-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-4-oliver.upton@linux.dev --- arch/arm64/kvm/arm.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..b6e26c0e65e5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1439,11 +1439,28 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, } } +static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->group) { + default: + return -ENXIO; + } +} + +static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->group) { + default: + return -ENXIO; + } +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; + struct kvm_device_attr attr; switch (ioctl) { case KVM_CREATE_IRQCHIP: { @@ -1479,6 +1496,18 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); } + case KVM_HAS_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + + return kvm_vm_has_attr(kvm, &attr); + } + case KVM_SET_DEVICE_ATTR: { + if (copy_from_user(&attr, argp, sizeof(attr))) + return -EFAULT; + + return kvm_vm_set_attr(kvm, &attr); + } default: return -EINVAL; } -- cgit From aac94968126beb9846c12a940f1302ece7849b4f Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:41 +0000 Subject: KVM: arm64: Rename SMC/HVC call handler to reflect reality KVM handles SMCCC calls from virtual EL2 that use the SMC instruction since commit bd36b1a9eb5a ("KVM: arm64: nv: Handle SMCs taken from virtual EL2"). Thus, the function name of the handler no longer reflects reality. Normalize the name on SMCCC, since that's the only hypercall interface KVM supports in the first place. No fuctional change intended. Reviewed-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-5-oliver.upton@linux.dev --- arch/arm64/kvm/handle_exit.c | 4 ++-- arch/arm64/kvm/hypercalls.c | 2 +- include/kvm/arm_hypercalls.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index a798c0b4d717..5e4f9737cbd5 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -52,7 +52,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu) return 1; } - ret = kvm_hvc_call_handler(vcpu); + ret = kvm_smccc_call_handler(vcpu); if (ret < 0) { vcpu_set_reg(vcpu, 0, ~0UL); return 1; @@ -89,7 +89,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than * being treated as UNDEFINED. */ - ret = kvm_hvc_call_handler(vcpu); + ret = kvm_smccc_call_handler(vcpu); if (ret < 0) vcpu_set_reg(vcpu, 0, ~0UL); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index a09a526a7d7c..5ead6c6afff0 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -121,7 +121,7 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) } } -int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; u32 func_id = smccc_get_function(vcpu); diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index 1188f116cf4e..8f4e33bc43e8 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -6,7 +6,7 @@ #include -int kvm_hvc_call_handler(struct kvm_vcpu *vcpu); +int kvm_smccc_call_handler(struct kvm_vcpu *vcpu); static inline u32 smccc_get_function(struct kvm_vcpu *vcpu) { -- cgit From c2d2e9b3d8ce9db825a5630d9d52d542f5138ae0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:42 +0000 Subject: KVM: arm64: Start handling SMCs from EL1 Whelp, the architecture gods have spoken and confirmed that the function ID space is common between SMCs and HVCs. Not only that, the expectation is that hypervisors handle calls to both SMC and HVC conduits. KVM recently picked up support for SMCCCs in commit bd36b1a9eb5a ("KVM: arm64: nv: Handle SMCs taken from virtual EL2") but scoped it only to a nested hypervisor. Let's just open the floodgates and let EL1 access our SMCCC implementation with the SMC instruction as well. Reviewed-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-6-oliver.upton@linux.dev --- arch/arm64/kvm/handle_exit.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5e4f9737cbd5..68f95dcd41a1 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -72,13 +72,15 @@ static int handle_smc(struct kvm_vcpu *vcpu) * * We need to advance the PC after the trap, as it would * otherwise return to the same address... - * - * Only handle SMCs from the virtual EL2 with an immediate of zero and - * skip it otherwise. */ - if (!vcpu_is_el2(vcpu) || kvm_vcpu_hvc_get_imm(vcpu)) { + kvm_incr_pc(vcpu); + + /* + * SMCs with a nonzero immediate are reserved according to DEN0028E 2.9 + * "SMC and HVC immediate value". + */ + if (kvm_vcpu_hvc_get_imm(vcpu)) { vcpu_set_reg(vcpu, 0, ~0UL); - kvm_incr_pc(vcpu); return 1; } @@ -93,8 +95,6 @@ static int handle_smc(struct kvm_vcpu *vcpu) if (ret < 0) vcpu_set_reg(vcpu, 0, ~0UL); - kvm_incr_pc(vcpu); - return ret; } -- cgit From a8308b3fc9494953c453480fb277e24f82f7d2b9 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:43 +0000 Subject: KVM: arm64: Refactor hvc filtering to support different actions KVM presently allows userspace to filter guest hypercalls with bitmaps expressed via pseudo-firmware registers. These bitmaps have a narrow scope and, of course, can only allow/deny a particular call. A subsequent change to KVM will introduce a generalized UAPI for filtering hypercalls, allowing functions to be forwarded to userspace. Refactor the existing hypercall filtering logic to make room for more than two actions. While at it, generalize the function names around SMCCC as it is the basis for the upcoming UAPI. No functional change intended. Reviewed-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-7-oliver.upton@linux.dev --- arch/arm64/include/uapi/asm/kvm.h | 9 +++++++++ arch/arm64/kvm/hypercalls.c | 26 ++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f8129c624b07..f9672ef1159a 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -469,6 +469,15 @@ enum { /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) +enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + +#ifdef __KERNEL__ + NR_SMCCC_FILTER_ACTIONS +#endif +}; + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 5ead6c6afff0..0be974e2f1fc 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -65,7 +65,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) val[3] = lower_32_bits(cycles); } -static bool kvm_hvc_call_default_allowed(u32 func_id) +static bool kvm_smccc_default_allowed(u32 func_id) { switch (func_id) { /* @@ -93,7 +93,7 @@ static bool kvm_hvc_call_default_allowed(u32 func_id) } } -static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) +static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; @@ -117,20 +117,38 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP, &smccc_feat->vendor_hyp_bmap); default: - return kvm_hvc_call_default_allowed(func_id); + return false; } } +static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id) +{ + if (kvm_smccc_test_fw_bmap(vcpu, func_id) || + kvm_smccc_default_allowed(func_id)) + return KVM_SMCCC_FILTER_HANDLE; + + return KVM_SMCCC_FILTER_DENY; +} + int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; u32 func_id = smccc_get_function(vcpu); u64 val[4] = {SMCCC_RET_NOT_SUPPORTED}; u32 feature; + u8 action; gpa_t gpa; - if (!kvm_hvc_call_allowed(vcpu, func_id)) + action = kvm_smccc_get_action(vcpu, func_id); + switch (action) { + case KVM_SMCCC_FILTER_HANDLE: + break; + case KVM_SMCCC_FILTER_DENY: + goto out; + default: + WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action); goto out; + } switch (func_id) { case ARM_SMCCC_VERSION_FUNC_ID: -- cgit From fb88707dd39bd1d5ec4a058776de9ee99bcc7b72 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:44 +0000 Subject: KVM: arm64: Use a maple tree to represent the SMCCC filter Maple tree is an efficient B-tree implementation that is intended for storing non-overlapping intervals. Such a data structure is a good fit for the SMCCC filter as it is desirable to sparsely allocate the 32 bit function ID space. To that end, add a maple tree to kvm_arch and correctly init/teardown along with the VM. Wire in a test against the hypercall filter for HVCs which does nothing until the controls are exposed to userspace. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-8-oliver.upton@linux.dev --- arch/arm64/include/asm/kvm_host.h | 5 +++- arch/arm64/kvm/arm.c | 2 ++ arch/arm64/kvm/hypercalls.c | 57 +++++++++++++++++++++++++++++++++++++++ include/kvm/arm_hypercalls.h | 1 + 4 files changed, 64 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d091d1c9890b..2682b3fd0881 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -221,7 +222,8 @@ struct kvm_arch { #define KVM_ARCH_FLAG_EL1_32BIT 4 /* PSCI SYSTEM_SUSPEND enabled for the guest */ #define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5 - + /* SMCCC filter initialized for the VM */ +#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 6 unsigned long flags; /* @@ -242,6 +244,7 @@ struct kvm_arch { /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; + struct maple_tree smccc_filter; /* * For an untrusted host VM, 'pkvm.handle' is used to lookup diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index b6e26c0e65e5..1202ac03bee0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -192,6 +192,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); + + kvm_arm_teardown_hypercalls(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 0be974e2f1fc..ba7cd84c6668 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -121,8 +121,58 @@ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) } } +#define SMCCC_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID +#define SMCCC_ARCH_RANGE_END \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, ARM_SMCCC_FUNC_MASK) + +static void init_smccc_filter(struct kvm *kvm) +{ + int r; + + mt_init(&kvm->arch.smccc_filter); + + /* + * Prevent userspace from handling any SMCCC calls in the architecture + * range, avoiding the risk of misrepresenting Spectre mitigation status + * to the guest. + */ + r = mtree_insert_range(&kvm->arch.smccc_filter, + SMCCC_ARCH_RANGE_BEGIN, SMCCC_ARCH_RANGE_END, + xa_mk_value(KVM_SMCCC_FILTER_HANDLE), + GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r); +} + +static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) +{ + unsigned long idx = func_id; + void *val; + + if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags)) + return KVM_SMCCC_FILTER_HANDLE; + + /* + * But where's the error handling, you say? + * + * mt_find() returns NULL if no entry was found, which just so happens + * to match KVM_SMCCC_FILTER_HANDLE. + */ + val = mt_find(&kvm->arch.smccc_filter, &idx, idx); + return xa_to_value(val); +} + static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id) { + /* + * Intervening actions in the SMCCC filter take precedence over the + * pseudo-firmware register bitmaps. + */ + u8 action = kvm_smccc_filter_get_action(vcpu->kvm, func_id); + if (action != KVM_SMCCC_FILTER_HANDLE) + return action; + if (kvm_smccc_test_fw_bmap(vcpu, func_id) || kvm_smccc_default_allowed(func_id)) return KVM_SMCCC_FILTER_HANDLE; @@ -263,6 +313,13 @@ void kvm_arm_init_hypercalls(struct kvm *kvm) smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES; smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; + + init_smccc_filter(kvm); +} + +void kvm_arm_teardown_hypercalls(struct kvm *kvm) +{ + mtree_destroy(&kvm->arch.smccc_filter); } int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index 8f4e33bc43e8..fe6c31575b05 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -43,6 +43,7 @@ static inline void smccc_set_retval(struct kvm_vcpu *vcpu, struct kvm_one_reg; void kvm_arm_init_hypercalls(struct kvm *kvm); +void kvm_arm_teardown_hypercalls(struct kvm *kvm); int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -- cgit From d824dff1919bbd523d4d5c860437d043c0ad121d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:45 +0000 Subject: KVM: arm64: Add support for KVM_EXIT_HYPERCALL In anticipation of user hypercall filters, add the necessary plumbing to get SMCCC calls out to userspace. Even though the exit structure has space for KVM to pass register arguments, let's just avoid it altogether and let userspace poke at the registers via KVM_GET_ONE_REG. This deliberately stretches the definition of a 'hypercall' to cover SMCs from EL1 in addition to the HVCs we know and love. KVM doesn't support EL1 calls into secure services, but now we can paint that as a userspace problem and be done with it. Finally, we need a flag to let userspace know what conduit instruction was used (i.e. SMC vs. HVC). Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-9-oliver.upton@linux.dev --- Documentation/virt/kvm/api.rst | 18 ++++++++++++++++-- arch/arm64/include/uapi/asm/kvm.h | 4 ++++ arch/arm64/kvm/handle_exit.c | 4 +++- arch/arm64/kvm/hypercalls.c | 16 ++++++++++++++++ 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9b01e3d0e757..9497792c4ee5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6221,11 +6221,25 @@ to the byte array. __u64 flags; } hypercall; -Unused. This was once used for 'hypercall to userspace'. To implement -such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390). + +It is strongly recommended that userspace use ``KVM_EXIT_IO`` (x86) or +``KVM_EXIT_MMIO`` (all except s390) to implement functionality that +requires a guest to interact with host userpace. .. note:: KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO. +For arm64: +---------- + +``nr`` contains the function ID of the guest's SMCCC call. Userspace is +expected to use the ``KVM_GET_ONE_REG`` ioctl to retrieve the call +parameters from the vCPU's GPRs. + +Definition of ``flags``: + - ``KVM_HYPERCALL_EXIT_SMC``: Indicates that the guest used the SMC + conduit to initiate the SMCCC call. If this bit is 0 then the guest + used the HVC conduit for the SMCCC call. + :: /* KVM_EXIT_TPR_ACCESS */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f9672ef1159a..f86446c5a7e3 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -472,12 +472,16 @@ enum { enum kvm_smccc_filter_action { KVM_SMCCC_FILTER_HANDLE = 0, KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, #ifdef __KERNEL__ NR_SMCCC_FILTER_ACTIONS #endif }; +/* arm64-specific KVM_EXIT_HYPERCALL flags */ +#define KVM_HYPERCALL_EXIT_SMC (1U << 0) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 68f95dcd41a1..3f43e20c48b6 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -71,7 +71,9 @@ static int handle_smc(struct kvm_vcpu *vcpu) * Trap exception, not a Secure Monitor Call exception [...]" * * We need to advance the PC after the trap, as it would - * otherwise return to the same address... + * otherwise return to the same address. Furthermore, pre-incrementing + * the PC before potentially exiting to userspace maintains the same + * abstraction for both SMCs and HVCs. */ kvm_incr_pc(vcpu); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index ba7cd84c6668..2db53709bec1 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -180,6 +180,19 @@ static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id) return KVM_SMCCC_FILTER_DENY; } +static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id) +{ + u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); + struct kvm_run *run = vcpu->run; + + run->exit_reason = KVM_EXIT_HYPERCALL; + run->hypercall.nr = func_id; + run->hypercall.flags = 0; + + if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64) + run->hypercall.flags |= KVM_HYPERCALL_EXIT_SMC; +} + int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; @@ -195,6 +208,9 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) break; case KVM_SMCCC_FILTER_DENY: goto out; + case KVM_SMCCC_FILTER_FWD_TO_USER: + kvm_prepare_hypercall_exit(vcpu, func_id); + return 0; default: WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action); goto out; -- cgit From 821d935c87bc95253f82deec3cbb457ccf3de003 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:46 +0000 Subject: KVM: arm64: Introduce support for userspace SMCCC filtering As the SMCCC (and related specifications) march towards an 'everything and the kitchen sink' interface for interacting with a system it becomes less likely that KVM will support every related feature. We could do better by letting userspace have a crack at it instead. Allow userspace to define an 'SMCCC filter' that applies to both HVCs and SMCs initiated by the guest. Supporting both conduits with this interface is important for a couple of reasons. Guest SMC usage is table stakes for a nested guest, as HVCs are always taken to the virtual EL2. Additionally, guests may want to interact with a service on the secure side which can now be proxied by userspace. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-10-oliver.upton@linux.dev --- Documentation/virt/kvm/api.rst | 4 ++ Documentation/virt/kvm/devices/vm.rst | 79 +++++++++++++++++++++++++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 11 +++++ arch/arm64/kvm/arm.c | 4 ++ arch/arm64/kvm/hypercalls.c | 60 ++++++++++++++++++++++++++ include/kvm/arm_hypercalls.h | 3 ++ 6 files changed, 161 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 9497792c4ee5..c8ab2f730945 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6231,6 +6231,10 @@ requires a guest to interact with host userpace. For arm64: ---------- +SMCCC exits can be enabled depending on the configuration of the SMCCC +filter. See the Documentation/virt/kvm/devices/vm.rst +``KVM_ARM_SMCCC_FILTER`` for more details. + ``nr`` contains the function ID of the guest's SMCCC call. Userspace is expected to use the ``KVM_GET_ONE_REG`` ioctl to retrieve the call parameters from the vCPU's GPRs. diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst index 147efec626e5..9d726e60ec47 100644 --- a/Documentation/virt/kvm/devices/vm.rst +++ b/Documentation/virt/kvm/devices/vm.rst @@ -321,3 +321,82 @@ Allows userspace to query the status of migration mode. if it is enabled :Returns: -EFAULT if the given address is not accessible from kernel space; 0 in case of success. + +6. GROUP: KVM_ARM_VM_SMCCC_CTRL +=============================== + +:Architectures: arm64 + +6.1. ATTRIBUTE: KVM_ARM_VM_SMCCC_FILTER (w/o) +--------------------------------------------- + +:Parameters: Pointer to a ``struct kvm_smccc_filter`` + +:Returns: + + ====== =========================================== + EEXIST Range intersects with a previously inserted + or reserved range + EBUSY A vCPU in the VM has already run + EINVAL Invalid filter configuration + ENOMEM Failed to allocate memory for the in-kernel + representation of the SMCCC filter + ====== =========================================== + +Requests the installation of an SMCCC call filter described as follows:: + + enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, + }; + + struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; + }; + +The filter is defined as a set of non-overlapping ranges. Each +range defines an action to be applied to SMCCC calls within the range. +Userspace can insert multiple ranges into the filter by using +successive calls to this attribute. + +The default configuration of KVM is such that all implemented SMCCC +calls are allowed. Thus, the SMCCC filter can be defined sparsely +by userspace, only describing ranges that modify the default behavior. + +The range expressed by ``struct kvm_smccc_filter`` is +[``base``, ``base + nr_functions``). The range is not allowed to wrap, +i.e. userspace cannot rely on ``base + nr_functions`` overflowing. + +The SMCCC filter applies to both SMC and HVC calls initiated by the +guest. The SMCCC filter gates the in-kernel emulation of SMCCC calls +and as such takes effect before other interfaces that interact with +SMCCC calls (e.g. hypercall bitmap registers). + +Actions: + + - ``KVM_SMCCC_FILTER_HANDLE``: Allows the guest SMCCC call to be + handled in-kernel. It is strongly recommended that userspace *not* + explicitly describe the allowed SMCCC call ranges. + + - ``KVM_SMCCC_FILTER_DENY``: Rejects the guest SMCCC call in-kernel + and returns to the guest. + + - ``KVM_SMCCC_FILTER_FWD_TO_USER``: The guest SMCCC call is forwarded + to userspace with an exit reason of ``KVM_EXIT_HYPERCALL``. + +The ``pad`` field is reserved for future use and must be zero. KVM may +return ``-EINVAL`` if the field is nonzero. + +KVM reserves the 'Arm Architecture Calls' range of function IDs and +will reject attempts to define a filter for any portion of these ranges: + + =========== =============== + Start End (inclusive) + =========== =============== + 0x8000_0000 0x8000_FFFF + 0xC000_0000 0xC000_FFFF + =========== =============== diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f86446c5a7e3..3dcfa4bfdf83 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -372,6 +372,10 @@ enum { #endif }; +/* Device Control API on vm fd */ +#define KVM_ARM_VM_SMCCC_CTRL 0 +#define KVM_ARM_VM_SMCCC_FILTER 0 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 @@ -479,6 +483,13 @@ enum kvm_smccc_filter_action { #endif }; +struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; +}; + /* arm64-specific KVM_EXIT_HYPERCALL flags */ #define KVM_HYPERCALL_EXIT_SMC (1U << 0) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1202ac03bee0..efee032c9560 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1444,6 +1444,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) { switch (attr->group) { + case KVM_ARM_VM_SMCCC_CTRL: + return kvm_vm_smccc_has_attr(kvm, attr); default: return -ENXIO; } @@ -1452,6 +1454,8 @@ static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) { switch (attr->group) { + case KVM_ARM_VM_SMCCC_CTRL: + return kvm_vm_smccc_set_attr(kvm, attr); default: return -ENXIO; } diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 2db53709bec1..9a35d6d18193 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -145,6 +145,44 @@ static void init_smccc_filter(struct kvm *kvm) WARN_ON_ONCE(r); } +static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr) +{ + const void *zero_page = page_to_virt(ZERO_PAGE(0)); + struct kvm_smccc_filter filter; + u32 start, end; + int r; + + if (copy_from_user(&filter, uaddr, sizeof(filter))) + return -EFAULT; + + if (memcmp(filter.pad, zero_page, sizeof(filter.pad))) + return -EINVAL; + + start = filter.base; + end = start + filter.nr_functions - 1; + + if (end < start || filter.action >= NR_SMCCC_FILTER_ACTIONS) + return -EINVAL; + + mutex_lock(&kvm->lock); + + if (kvm_vm_has_ran_once(kvm)) { + r = -EBUSY; + goto out_unlock; + } + + r = mtree_insert_range(&kvm->arch.smccc_filter, start, end, + xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT); + if (r) + goto out_unlock; + + set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags); + +out_unlock: + mutex_unlock(&kvm->lock); + return r; +} + static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) { unsigned long idx = func_id; @@ -569,3 +607,25 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return -EINVAL; } + +int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return 0; + default: + return -ENXIO; + } +} + +int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + void __user *uaddr = (void __user *)attr->addr; + + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return kvm_smccc_set_filter(kvm, uaddr); + default: + return -ENXIO; + } +} diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h index fe6c31575b05..2df152207ccd 100644 --- a/include/kvm/arm_hypercalls.h +++ b/include/kvm/arm_hypercalls.h @@ -49,4 +49,7 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr); +int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr); + #endif -- cgit From 7e484d2785e2a2e526a6b2679d3e4c1402ffe0ec Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:47 +0000 Subject: KVM: arm64: Return NOT_SUPPORTED to guest for unknown PSCI version A subsequent change to KVM will allow negative returns from SMCCC handlers to exit to userspace. Make way for this change by explicitly returning SMCCC_RET_NOT_SUPPORTED to the guest if the VM is configured to use an unknown PSCI version. Add a WARN since this is undoubtedly a KVM bug. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-11-oliver.upton@linux.dev --- arch/arm64/kvm/psci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 7fbc4c1b9df0..aff54b106c30 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -435,6 +435,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) int kvm_psci_call(struct kvm_vcpu *vcpu) { u32 psci_fn = smccc_get_function(vcpu); + int version = kvm_psci_version(vcpu); unsigned long val; val = kvm_psci_check_allowed_function(vcpu, psci_fn); @@ -443,7 +444,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) return 1; } - switch (kvm_psci_version(vcpu)) { + switch (version) { case KVM_ARM_PSCI_1_1: return kvm_psci_1_x_call(vcpu, 1); case KVM_ARM_PSCI_1_0: @@ -453,6 +454,8 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) case KVM_ARM_PSCI_0_1: return kvm_psci_0_1_call(vcpu); default: - return -EINVAL; + WARN_ONCE(1, "Unknown PSCI version %d", version); + smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0); + return 1; } } -- cgit From 37c8e494794786aa8e4acba1f0f5b45f37b11699 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:48 +0000 Subject: KVM: arm64: Let errors from SMCCC emulation to reach userspace Typically a negative return from an exit handler is used to request a return to userspace with the specified error. KVM's handling of SMCCC emulation (i.e. both HVCs and SMCs) deviates from the trend and resumes the guest instead. Stop handling negative returns this way and instead let the error percolate to userspace. Suggested-by: Suzuki K Poulose Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-12-oliver.upton@linux.dev --- arch/arm64/kvm/handle_exit.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 3f43e20c48b6..6dcd6604b6bc 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -36,8 +36,6 @@ static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u64 esr) static int handle_hvc(struct kvm_vcpu *vcpu) { - int ret; - trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); vcpu->stat.hvc_exit_stat++; @@ -52,19 +50,11 @@ static int handle_hvc(struct kvm_vcpu *vcpu) return 1; } - ret = kvm_smccc_call_handler(vcpu); - if (ret < 0) { - vcpu_set_reg(vcpu, 0, ~0UL); - return 1; - } - - return ret; + return kvm_smccc_call_handler(vcpu); } static int handle_smc(struct kvm_vcpu *vcpu) { - int ret; - /* * "If an SMC instruction executed at Non-secure EL1 is * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a @@ -93,11 +83,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * at Non-secure EL1 is trapped to EL2 if HCR_EL2.TSC==1, rather than * being treated as UNDEFINED. */ - ret = kvm_smccc_call_handler(vcpu); - if (ret < 0) - vcpu_set_reg(vcpu, 0, ~0UL); - - return ret; + return kvm_smccc_call_handler(vcpu); } /* -- cgit From fab19915f498b0e76fabd4d78841c99b7b6d7851 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:49 +0000 Subject: KVM: selftests: Add a helper for SMCCC calls with SMC instruction Build a helper for doing SMCs in selftests by macro-izing the current HVC implementation and taking the conduit instruction as an argument. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-13-oliver.upton@linux.dev --- .../selftests/kvm/include/aarch64/processor.h | 13 ++++++ .../testing/selftests/kvm/lib/aarch64/processor.c | 52 ++++++++++++++-------- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 5f977528e09c..cb537253a6b9 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -214,6 +214,19 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res); +/** + * smccc_smc - Invoke a SMCCC function using the smc conduit + * @function_id: the SMCCC function to be called + * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7 + * @res: pointer to write the return values from registers x0-x3 + * + */ +void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, struct arm_smccc_res *res); + + + uint32_t guest_get_vcpuid(void); #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 5972a23b2765..24e8122307f4 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -508,29 +508,43 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, close(kvm_fd); } +#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \ + arg6, res) \ + asm volatile("mov w0, %w[function_id]\n" \ + "mov x1, %[arg0]\n" \ + "mov x2, %[arg1]\n" \ + "mov x3, %[arg2]\n" \ + "mov x4, %[arg3]\n" \ + "mov x5, %[arg4]\n" \ + "mov x6, %[arg5]\n" \ + "mov x7, %[arg6]\n" \ + #insn "#0\n" \ + "mov %[res0], x0\n" \ + "mov %[res1], x1\n" \ + "mov %[res2], x2\n" \ + "mov %[res3], x3\n" \ + : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \ + [res2] "=r"(res->a2), [res3] "=r"(res->a3) \ + : [function_id] "r"(function_id), [arg0] "r"(arg0), \ + [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \ + [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \ + : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7") + + void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res) { - asm volatile("mov w0, %w[function_id]\n" - "mov x1, %[arg0]\n" - "mov x2, %[arg1]\n" - "mov x3, %[arg2]\n" - "mov x4, %[arg3]\n" - "mov x5, %[arg4]\n" - "mov x6, %[arg5]\n" - "mov x7, %[arg6]\n" - "hvc #0\n" - "mov %[res0], x0\n" - "mov %[res1], x1\n" - "mov %[res2], x2\n" - "mov %[res3], x3\n" - : [res0] "=r"(res->a0), [res1] "=r"(res->a1), - [res2] "=r"(res->a2), [res3] "=r"(res->a3) - : [function_id] "r"(function_id), [arg0] "r"(arg0), - [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), - [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) - : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7"); + __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, + arg6, res); +} + +void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, + uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, + uint64_t arg6, struct arm_smccc_res *res) +{ + __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, + arg6, res); } void kvm_selftest_arch_init(void) -- cgit From 60e7dade498eb881bcdf0d9a420c97625f73acc1 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Apr 2023 15:40:50 +0000 Subject: KVM: selftests: Add test for SMCCC filter Add a selftest for the SMCCC filter, ensuring basic UAPI constraints (e.g. reserved ranges, non-overlapping ranges) are upheld. Additionally, test that the DENIED and FWD_TO_USER work as intended. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230404154050.2270077-14-oliver.upton@linux.dev --- tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/aarch64/smccc_filter.c | 260 +++++++++++++++++++++ 2 files changed, 261 insertions(+) create mode 100644 tools/testing/selftests/kvm/aarch64/smccc_filter.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 84a627c43795..d66a0642cffd 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -141,6 +141,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/hypercalls TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test +TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c new file mode 100644 index 000000000000..0f9db0641847 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * smccc_filter - Tests for the SMCCC filter UAPI. + * + * Copyright (c) 2023 Google LLC + * + * This test includes: + * - Tests that the UAPI constraints are upheld by KVM. For example, userspace + * is prevented from filtering the architecture range of SMCCC calls. + * - Test that the filter actions (DENIED, FWD_TO_USER) work as intended. + */ + +#include +#include +#include + +#include "processor.h" +#include "test_util.h" + +enum smccc_conduit { + HVC_INSN, + SMC_INSN, +}; + +#define for_each_conduit(conduit) \ + for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++) + +static void guest_main(uint32_t func_id, enum smccc_conduit conduit) +{ + struct arm_smccc_res res; + + if (conduit == SMC_INSN) + smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res); + else + smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res); + + GUEST_SYNC(res.a0); +} + +static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, + enum kvm_smccc_filter_action action) +{ + struct kvm_smccc_filter filter = { + .base = start, + .nr_functions = nr_functions, + .action = action, + }; + + return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL, + KVM_ARM_VM_SMCCC_FILTER, &filter); +} + +static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, + enum kvm_smccc_filter_action action) +{ + int ret = __set_smccc_filter(vm, start, nr_functions, action); + + TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret); +} + +static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) +{ + struct kvm_vcpu_init init; + struct kvm_vm *vm; + + vm = vm_create(1); + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + + /* + * Enable in-kernel emulation of PSCI to ensure that calls are denied + * due to the SMCCC filter, not because of KVM. + */ + init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); + + *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main); + return vm; +} + +static void test_pad_must_be_zero(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + struct kvm_smccc_filter filter = { + .base = PSCI_0_2_FN_PSCI_VERSION, + .nr_functions = 1, + .action = KVM_SMCCC_FILTER_DENY, + .pad = { -1 }, + }; + int r; + + r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL, + KVM_ARM_VM_SMCCC_FILTER, &filter); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Setting filter with nonzero padding should return EINVAL"); +} + +/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */ +static void test_filter_reserved_range(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + int r; + + r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1, + 1, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EEXIST, + "Attempt to filter reserved range should return EEXIST"); + + kvm_vm_free(vm); +} + +static void test_invalid_nr_functions(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + int r; + + r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Attempt to filter 0 functions should return EINVAL"); + + kvm_vm_free(vm); +} + +static void test_overflow_nr_functions(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + int r; + + r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Attempt to overflow filter range should return EINVAL"); + + kvm_vm_free(vm); +} + +static void test_reserved_action(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + int r; + + r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Attempt to use reserved filter action should return EINVAL"); + + kvm_vm_free(vm); +} + + +/* Test that overlapping configurations of the SMCCC filter are rejected */ +static void test_filter_overlap(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = setup_vm(&vcpu); + int r; + + set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY); + + r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EEXIST, + "Attempt to filter already configured range should return EEXIST"); + + kvm_vm_free(vm); +} + +static void expect_call_denied(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) != UCALL_SYNC) + TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd); + + TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED, + "Unexpected SMCCC return code: %lu", uc.args[1]); +} + +/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */ +static void test_filter_denied(void) +{ + enum smccc_conduit conduit; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + for_each_conduit(conduit) { + vm = setup_vm(&vcpu); + + set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY); + vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit); + + vcpu_run(vcpu); + expect_call_denied(vcpu); + + kvm_vm_free(vm); + } +} + +static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id, + enum smccc_conduit conduit) +{ + struct kvm_run *run = vcpu->run; + + TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL, + "Unexpected exit reason: %u", run->exit_reason); + TEST_ASSERT(run->hypercall.nr == func_id, + "Unexpected SMCCC function: %llu", run->hypercall.nr); + + if (conduit == SMC_INSN) + TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC, + "KVM_HYPERCALL_EXIT_SMC is not set"); + else + TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC), + "KVM_HYPERCAL_EXIT_SMC is set"); +} + +/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */ +static void test_filter_fwd_to_user(void) +{ + enum smccc_conduit conduit; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + for_each_conduit(conduit) { + vm = setup_vm(&vcpu); + + set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER); + vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit); + + vcpu_run(vcpu); + expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit); + + kvm_vm_free(vm); + } +} + +static bool kvm_supports_smccc_filter(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + int r; + + r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER); + + kvm_vm_free(vm); + return !r; +} + +int main(void) +{ + TEST_REQUIRE(kvm_supports_smccc_filter()); + + test_pad_must_be_zero(); + test_invalid_nr_functions(); + test_overflow_nr_functions(); + test_reserved_action(); + test_filter_reserved_range(); + test_filter_overlap(); + test_filter_denied(); + test_filter_fwd_to_user(); +} -- cgit From 0e5c9a9d6548e9b178d4696c696ae4a21c39ae58 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 5 Apr 2023 12:48:58 +0100 Subject: KVM: arm64: Expose SMC/HVC width to userspace When returning to userspace to handle a SMCCC call, we consistently set PC to point to the instruction immediately after the HVC/SMC. However, should userspace need to know the exact address of the trapping instruction, it needs to know about the *size* of that instruction. For AArch64, this is pretty easy. For AArch32, this is a bit more funky, as Thumb has 16bit encodings for both HVC and SMC. Expose this to userspace with a new flag that directly derives from ESR_EL2.IL. Also update the documentation to reflect the PC state at the point of exit. Finally, this fixes a small buglet where the hypercall.{args,ret} fields would not be cleared on exit, and could contain some random junk. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/86pm8iv8tj.wl-maz@kernel.org --- Documentation/virt/kvm/api.rst | 8 ++++++++ arch/arm64/include/uapi/asm/kvm.h | 3 ++- arch/arm64/kvm/hypercalls.c | 16 +++++++++++----- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index c8ab2f730945..103f945959ed 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6244,6 +6244,14 @@ Definition of ``flags``: conduit to initiate the SMCCC call. If this bit is 0 then the guest used the HVC conduit for the SMCCC call. + - ``KVM_HYPERCALL_EXIT_16BIT``: Indicates that the guest used a 16bit + instruction to initiate the SMCCC call. If this bit is 0 then the + guest used a 32bit instruction. An AArch64 guest always has this + bit set to 0. + +At the point of exit, PC points to the instruction immediately following +the trapping instruction. + :: /* KVM_EXIT_TPR_ACCESS */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3dcfa4bfdf83..b1c1edf85480 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -491,7 +491,8 @@ struct kvm_smccc_filter { }; /* arm64-specific KVM_EXIT_HYPERCALL flags */ -#define KVM_HYPERCALL_EXIT_SMC (1U << 0) +#define KVM_HYPERCALL_EXIT_SMC (1U << 0) +#define KVM_HYPERCALL_EXIT_16BIT (1U << 1) #endif diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 9a35d6d18193..3b6523f25afc 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -222,13 +222,19 @@ static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id) { u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); struct kvm_run *run = vcpu->run; - - run->exit_reason = KVM_EXIT_HYPERCALL; - run->hypercall.nr = func_id; - run->hypercall.flags = 0; + u64 flags = 0; if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64) - run->hypercall.flags |= KVM_HYPERCALL_EXIT_SMC; + flags |= KVM_HYPERCALL_EXIT_SMC; + + if (!kvm_vcpu_trap_il_is32bit(vcpu)) + flags |= KVM_HYPERCALL_EXIT_16BIT; + + run->exit_reason = KVM_EXIT_HYPERCALL; + run->hypercall = (typeof(run->hypercall)) { + .nr = func_id, + .flags = flags, + }; } int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) -- cgit From 5a23ad6510c82049f5ab3795841c30e8f3ca324d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Sat, 8 Apr 2023 12:17:31 +0000 Subject: KVM: arm64: Prevent userspace from handling SMC64 arch range Though presently unused, there is an SMC64 view of the Arm architecture calls defined by the SMCCC. The documentation of the SMCCC filter states that the SMC64 range is reserved, but nothing actually prevents userspace from applying a filter to the range. Insert a range with the HANDLE action for the SMC64 arch range, thereby preventing userspace from imposing filtering/forwarding on it. Fixes: fb88707dd39b ("KVM: arm64: Use a maple tree to represent the SMCCC filter") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230408121732.3411329-2-oliver.upton@linux.dev --- arch/arm64/kvm/hypercalls.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 3b6523f25afc..47254a361295 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -121,11 +121,17 @@ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) } } -#define SMCCC_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID -#define SMCCC_ARCH_RANGE_END \ - ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ - ARM_SMCCC_SMC_32, \ - 0, ARM_SMCCC_FUNC_MASK) +#define SMC32_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID +#define SMC32_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, ARM_SMCCC_FUNC_MASK) + +#define SMC64_ARCH_RANGE_BEGIN ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + 0, 0) +#define SMC64_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + 0, ARM_SMCCC_FUNC_MASK) static void init_smccc_filter(struct kvm *kvm) { @@ -139,10 +145,17 @@ static void init_smccc_filter(struct kvm *kvm) * to the guest. */ r = mtree_insert_range(&kvm->arch.smccc_filter, - SMCCC_ARCH_RANGE_BEGIN, SMCCC_ARCH_RANGE_END, + SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END, xa_mk_value(KVM_SMCCC_FILTER_HANDLE), GFP_KERNEL_ACCOUNT); WARN_ON_ONCE(r); + + r = mtree_insert_range(&kvm->arch.smccc_filter, + SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END, + xa_mk_value(KVM_SMCCC_FILTER_HANDLE), + GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r); + } static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr) -- cgit From 00e0c947118f456b622c1f2ca316c116dfb4e12c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Sat, 8 Apr 2023 12:17:32 +0000 Subject: KVM: arm64: Test that SMC64 arch calls are reserved Assert that the SMC64 view of the Arm architecture range is reserved by KVM and cannot be filtered by userspace. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230408121732.3411329-3-oliver.upton@linux.dev --- tools/testing/selftests/kvm/aarch64/smccc_filter.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c index 0f9db0641847..dab671fdf239 100644 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c @@ -99,6 +99,7 @@ static void test_filter_reserved_range(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm = setup_vm(&vcpu); + uint32_t smc64_fn; int r; r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1, @@ -106,6 +107,13 @@ static void test_filter_reserved_range(void) TEST_ASSERT(r < 0 && errno == EEXIST, "Attempt to filter reserved range should return EEXIST"); + smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, + 0, 0); + + r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY); + TEST_ASSERT(r < 0 && errno == EEXIST, + "Attempt to filter reserved range should return EEXIST"); + kvm_vm_free(vm); } -- cgit From c5284f6d8ce2b9cf96643da441862434233a4ea3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 6 Apr 2023 09:02:26 +0100 Subject: KVM: selftests: Fix spelling mistake "KVM_HYPERCAL_EXIT_SMC" -> "KVM_HYPERCALL_EXIT_SMC" There is a spelling mistake in a test assert message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230406080226.122955-1-colin.i.king@gmail.com --- tools/testing/selftests/kvm/aarch64/smccc_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c index dab671fdf239..f4ceae9c8925 100644 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c @@ -219,7 +219,7 @@ static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id, "KVM_HYPERCALL_EXIT_SMC is not set"); else TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC), - "KVM_HYPERCAL_EXIT_SMC is set"); + "KVM_HYPERCALL_EXIT_SMC is set"); } /* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */ -- cgit From 49e5d16b6fc003407a33a9961b4bcbb970bd1c76 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 12 Apr 2023 06:27:33 +0000 Subject: KVM: arm64: vgic: Don't acquire its_lock before config_lock commit f00327731131 ("KVM: arm64: Use config_lock to protect vgic state") was meant to rectify a longstanding lock ordering issue in KVM where the kvm->lock is taken while holding vcpu->mutex. As it so happens, the aforementioned commit introduced yet another locking issue by acquiring the its_lock before acquiring the config lock. This is obviously wrong, especially considering that the lock ordering is well documented in vgic.c. Reshuffle the locks once more to take the config_lock before the its_lock. While at it, sprinkle in the lockdep hinting that has become popular as of late to keep lockdep apprised of our ordering. Cc: stable@vger.kernel.org Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230412062733.988229-1-oliver.upton@linux.dev --- arch/arm64/kvm/vgic/vgic-its.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 7713cd06104e..750e51e3779a 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1958,6 +1958,16 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) mutex_init(&its->its_lock); mutex_init(&its->cmd_lock); + /* Yep, even more trickery for lock ordering... */ +#ifdef CONFIG_LOCKDEP + mutex_lock(&dev->kvm->arch.config_lock); + mutex_lock(&its->cmd_lock); + mutex_lock(&its->its_lock); + mutex_unlock(&its->its_lock); + mutex_unlock(&its->cmd_lock); + mutex_unlock(&dev->kvm->arch.config_lock); +#endif + its->vgic_its_base = VGIC_ADDR_UNDEF; INIT_LIST_HEAD(&its->device_list); @@ -2752,15 +2762,14 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) return 0; mutex_lock(&kvm->lock); - mutex_lock(&its->its_lock); if (!lock_all_vcpus(kvm)) { - mutex_unlock(&its->its_lock); mutex_unlock(&kvm->lock); return -EBUSY; } mutex_lock(&kvm->arch.config_lock); + mutex_lock(&its->its_lock); switch (attr) { case KVM_DEV_ARM_ITS_CTRL_RESET: @@ -2774,9 +2783,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) break; } + mutex_unlock(&its->its_lock); mutex_unlock(&kvm->arch.config_lock); unlock_all_vcpus(kvm); - mutex_unlock(&its->its_lock); mutex_unlock(&kvm->lock); return ret; } -- cgit From 55b5bac15939dec3cbcbee1f6271bc3a4afd4534 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:23 +0100 Subject: KVM: arm64: nvhe: Synchronise with page table walker on vcpu run When taking an exception between the EL1&0 translation regime and the EL2 translation regime, the page table walker is allowed to complete the walks started from EL0 or EL1 while running at EL2. It means that altering the system registers that define the EL1&0 translation regime is fraught with danger *unless* we wait for the completion of such walk with a DSB (R_LFHQG and subsequent statements in the ARM ARM). We already did the right thing for other external agents (SPE, TRBE), but not the PTW. Rework the existing SPE/TRBE synchronisation to include the PTW, and add the missing DSB on guest exit. Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 2 -- arch/arm64/kvm/hyp/nvhe/switch.c | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 2673bde62fad..d756b939f296 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -37,7 +37,6 @@ static void __debug_save_spe(u64 *pmscr_el1) /* Now drain all buffered data to memory */ psb_csync(); - dsb(nsh); } static void __debug_restore_spe(u64 pmscr_el1) @@ -69,7 +68,6 @@ static void __debug_save_trace(u64 *trfcr_el1) isb(); /* Drain the trace buffer to memory */ tsb_csync(); - dsb(nsh); } static void __debug_restore_trace(u64 trfcr_el1) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c2cb46ca4fb6..71fa16a0dc77 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -272,6 +272,17 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ __debug_save_host_buffers_nvhe(vcpu); + /* + * We're about to restore some new MMU state. Make sure + * ongoing page-table walks that have started before we + * trapped to EL2 have completed. This also synchronises the + * above disabling of SPE and TRBE. + * + * See DDI0487I.a D8.1.5 "Out-of-context translation regimes", + * rule R_LFHQG and subsequent information statements. + */ + dsb(nsh); + __kvm_adjust_pc(vcpu); /* @@ -306,6 +317,13 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __timer_disable_traps(vcpu); __hyp_vgic_save_state(vcpu); + /* + * Same thing as before the guest run: we're about to switch + * the MMU context, so let's make sure we don't have any + * ongoing EL1&0 translations. + */ + dsb(nsh); + __deactivate_traps(vcpu); __load_host_stage2(); -- cgit From a6610435ac17de1ac727c90ad62c723d86c7ea36 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 13 Apr 2023 14:23:42 +0100 Subject: KVM: arm64: Handle 32bit CNTPCTSS traps When CNTPOFF isn't implemented and that we have a non-zero counter offset, CNTPCT and CNTPCTSS are trapped. We properly handle the former, but not the latter, as it is not present in the sysreg table (despite being actually handled in the code). Bummer. Just populate the cp15_64 table with the missing register. Reported-by: Reiji Watanabe Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kvm/sys_regs.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index f8da9e1b0c11..a43f21559c3e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -403,6 +403,7 @@ #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) #define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) +#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0) #define __PMEV_op2(n) ((n) & 0x7) #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index be7c2598e563..feca77083a5c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2538,6 +2538,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, + { SYS_DESC(SYS_AARCH32_CNTPCTSS), access_arch_timer }, }; static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, -- cgit From 7e1b2329c205d0a08ebaf3f619318a8a18f36644 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:24 +0100 Subject: KVM: arm64: nvhe: Synchronise with page table walker on TLBI A TLBI from EL2 impacting EL1 involves messing with the EL1&0 translation regime, and the page table walker may still be performing speculative walks. Piggyback on the existing DSBs to always have a DSB ISH that will synchronise all load/store operations that the PTW may still have. Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/tlb.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index d296d617f589..978179133f4b 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -15,8 +15,31 @@ struct tlb_inv_context { }; static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt) + struct tlb_inv_context *cxt, + bool nsh) { + /* + * We have two requirements: + * + * - ensure that the page table updates are visible to all + * CPUs, for which a dsb(DOMAIN-st) is what we need, DOMAIN + * being either ish or nsh, depending on the invalidation + * type. + * + * - complete any speculative page table walk started before + * we trapped to EL2 so that we can mess with the MM + * registers out of context, for which dsb(nsh) is enough + * + * The composition of these two barriers is a dsb(DOMAIN), and + * the 'nsh' parameter tracks the distinction between + * Inner-Shareable and Non-Shareable, as specified by the + * callers. + */ + if (nsh) + dsb(nsh); + else + dsb(ish); + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; @@ -60,10 +83,8 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, { struct tlb_inv_context cxt; - dsb(ishst); - /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + __tlb_switch_to_guest(mmu, &cxt, false); /* * We could do so much better if we had the VA as well. @@ -113,10 +134,8 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; - dsb(ishst); - /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + __tlb_switch_to_guest(mmu, &cxt, false); __tlbi(vmalls12e1is); dsb(ish); @@ -130,7 +149,7 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + __tlb_switch_to_guest(mmu, &cxt, false); __tlbi(vmalle1); asm volatile("ic iallu"); @@ -142,7 +161,8 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) void __kvm_flush_vm_context(void) { - dsb(ishst); + /* Same remark as in __tlb_switch_to_guest() */ + dsb(ish); __tlbi(alle1is); /* -- cgit From 8442d65373c6316876208c1ad27729e9682fa3cf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:25 +0100 Subject: KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc() We rely on the presence of a DSB at the end of kvm_flush_dcache_to_poc() that, on top of ensuring completion of the cache clean, also covers the speculative page table walk started from EL1. Document this dependency. Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 552653fa18be..2e9ec4a2a4a3 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -297,6 +297,13 @@ int __pkvm_prot_finalize(void) params->vttbr = kvm_get_vttbr(mmu); params->vtcr = host_mmu.arch.vtcr; params->hcr_el2 |= HCR_VM; + + /* + * The CMO below not only cleans the updated params to the + * PoC, but also provides the DSB that ensures ongoing + * page-table walks that have started before we trapped to EL2 + * have completed. + */ kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg(params->hcr_el2, hcr_el2); -- cgit From 1ff2755d6800d60bee96fb303cbbf30f9bb483a2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:26 +0100 Subject: KVM: arm64: vhe: Synchronise with page table walker on MMU update Contrary to nVHE, VHE is a lot easier when it comes to dealing with speculative page table walks started at EL1. As we only change EL1&0 translation regime when context-switching, we already benefit from the effect of the DSB that sits in the context switch code. We only need to take care of it in the NV case, where we can flip between between two EL1 contexts (one of them being the virtual EL2) without a context switch. Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 7b44f6b3b547..b35a178e7e0d 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and @@ -69,6 +70,17 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; __sysreg_save_user_state(host_ctxt); + /* + * When running a normal EL1 guest, we only load a new vcpu + * after a context switch, which imvolves a DSB, so all + * speculative EL1&0 walks will have already completed. + * If running NV, the vcpu may transition between vEL1 and + * vEL2 without a context switch, so make sure we complete + * those walks before loading a new context. + */ + if (vcpu_has_nv(vcpu)) + dsb(nsh); + /* * Load guest EL1 and user state * -- cgit From bcf3e7da3ad3bfea38ac6ba9f56b99b2877af51f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 8 Apr 2023 17:04:27 +0100 Subject: KVM: arm64: vhe: Drop extra isb() on guest exit __kvm_vcpu_run_vhe() end on VHE with an isb(). However, this function is only reachable via kvm_call_hyp_ret(), which already contains an isb() in order to mimick the behaviour of nVHE and provide a context synchronisation event. We thus have two isb()s back to back, which is one too many. Drop the first one and solely rely on the one in the helper. Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/switch.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index cd3f3117bf16..3d868e84c7a0 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -227,11 +227,10 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) /* * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. + * parameters, such as traps. We rely on the isb() in kvm_call_hyp*() + * to make sure these changes take effect before running the host or + * additional guests. */ - isb(); - return ret; } -- cgit From 4ff910be01c0ca28c2ea8b354dd47a3a17524489 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Tue, 18 Apr 2023 19:18:51 -0700 Subject: KVM: arm64: Acquire mp_state_lock in kvm_arch_vcpu_ioctl_vcpu_init() kvm_arch_vcpu_ioctl_vcpu_init() doesn't acquire mp_state_lock when setting the mp_state to KVM_MP_STATE_RUNNABLE. Fix the code to acquire the lock. Signed-off-by: Reiji Watanabe [maz: minor refactor] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230419021852.2981107-2-reijiw@google.com --- arch/arm64/kvm/arm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fd8d355aca15..ad3655a7d122 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1241,11 +1241,15 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, /* * Handle the "start in power-off" case. */ + spin_lock(&vcpu->arch.mp_state_lock); + if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - kvm_arm_vcpu_power_off(vcpu); + __kvm_arm_vcpu_power_off(vcpu); else WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); + spin_unlock(&vcpu->arch.mp_state_lock); + return 0; } -- cgit From a189884bdc9238aeba941c50f02e25eb584fafed Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Tue, 18 Apr 2023 19:18:52 -0700 Subject: KVM: arm64: Have kvm_psci_vcpu_on() use WRITE_ONCE() to update mp_state All accessors of kvm_vcpu_arch::mp_state should be {READ,WRITE}_ONCE(), since readers of the mp_state don't acquire the mp_state_lock. Nonetheless, kvm_psci_vcpu_on() updates the mp_state without using WRITE_ONCE(). So, fix the code to update the mp_state using WRITE_ONCE. Signed-off-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230419021852.2981107-3-reijiw@google.com --- arch/arm64/kvm/psci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 5767e6baa61a..d046e82e3723 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -110,7 +110,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ smp_wmb(); - vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); kvm_vcpu_wake_up(vcpu); out_unlock: -- cgit