From 0ca54b29054151b7a52cbb8904732280afe5a302 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 26 Jun 2018 11:03:18 -0400 Subject: media: rc: be less noisy when driver misbehaves Since commit 48231f289e52 ("media: rc: drivers should produce alternate pulse and space timing events"), on meson-ir we are regularly producing errors. Reduce to warning level and only warn once to avoid flooding the log. A proper fix for meson-ir is going to be too large for v4.18. Signed-off-by: Sean Young Cc: stable@vger.kernel.org # 4.17+ Tested-by: Jerome Brunet Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/rc-ir-raw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/rc/rc-ir-raw.c b/drivers/media/rc/rc-ir-raw.c index 2e0066b1a31c..e7948908e78c 100644 --- a/drivers/media/rc/rc-ir-raw.c +++ b/drivers/media/rc/rc-ir-raw.c @@ -30,13 +30,13 @@ static int ir_raw_event_thread(void *data) while (kfifo_out(&raw->kfifo, &ev, 1)) { if (is_timing_event(ev)) { if (ev.duration == 0) - dev_err(&dev->dev, "nonsensical timing event of duration 0"); + dev_warn_once(&dev->dev, "nonsensical timing event of duration 0"); if (is_timing_event(raw->prev_ev) && !is_transition(&ev, &raw->prev_ev)) - dev_err(&dev->dev, "two consecutive events of type %s", - TO_STR(ev.pulse)); + dev_warn_once(&dev->dev, "two consecutive events of type %s", + TO_STR(ev.pulse)); if (raw->prev_ev.reset && ev.pulse == 0) - dev_err(&dev->dev, "timing event after reset should be pulse"); + dev_warn_once(&dev->dev, "timing event after reset should be pulse"); } list_for_each_entry(handler, &ir_raw_handler_list, list) if (dev->enabled_protocols & -- cgit From 5f3417569165a8ee57654217f73e0160312f409c Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 3 Jul 2018 12:56:03 -0400 Subject: drm/bridge: adv7511: Reset registers on hotplug The bridge loses its hw state when the cable is unplugged. If we detect this case in the hpd handler, reset its state. Reported-by: Rob Clark Tested-by: Rob Clark Reviewed-by: Archit Taneja Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180703165648.120401-1-seanpaul@chromium.org --- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 73021b388e12..dd3ff2f2cdce 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -429,6 +429,18 @@ static void adv7511_hpd_work(struct work_struct *work) else status = connector_status_disconnected; + /* + * The bridge resets its registers on unplug. So when we get a plug + * event and we're already supposed to be powered, cycle the bridge to + * restore its state. + */ + if (status == connector_status_connected && + adv7511->connector.status == connector_status_disconnected && + adv7511->powered) { + regcache_mark_dirty(adv7511->regmap); + adv7511_power_on(adv7511); + } + if (adv7511->connector.status != status) { adv7511->connector.status = status; if (status == connector_status_disconnected) -- cgit From d530b5f1ca0bb66958a2b714bebe40a1248b9c15 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Sat, 14 Jul 2018 14:32:12 +0200 Subject: drm: re-enable error handling drm_legacy_ctxbitmap_next() returns idr_alloc() which can return -ENOMEM, -EINVAL or -ENOSPC none of which are -1 . but the call sites of drm_legacy_ctxbitmap_next() seem to be assuming that the error case would be -1 (original return of drm_ctxbitmap_next() prior to 2.6.23 was actually -1). Thus reenable error handling by checking for < 0. Signed-off-by: Nicholas Mc Guire Fixes: 62968144e673 ("drm: convert drm context code to use Linux idr") Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/1531571532-22733-1-git-send-email-hofrat@osadl.org --- drivers/gpu/drm/drm_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c index 3c4000facb36..f973d287696a 100644 --- a/drivers/gpu/drm/drm_context.c +++ b/drivers/gpu/drm/drm_context.c @@ -372,7 +372,7 @@ int drm_legacy_addctx(struct drm_device *dev, void *data, ctx->handle = drm_legacy_ctxbitmap_next(dev); } DRM_DEBUG("%d\n", ctx->handle); - if (ctx->handle == -1) { + if (ctx->handle < 0) { DRM_DEBUG("Not enough free contexts.\n"); /* Should this return -EBUSY instead? */ return -ENOMEM; -- cgit From 46d8c4b28652d35dc6cfb5adf7f54e102fc04384 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 13 Jul 2018 16:12:32 +0800 Subject: crypto: padlock-aes - Fix Nano workaround data corruption This was detected by the self-test thanks to Ard's chunking patch. I finally got around to testing this out on my ancient Via box. It turns out that the workaround got the assembly wrong and we end up doing count + initial cycles of the loop instead of just count. This obviously causes corruption, either by overwriting the source that is yet to be processed, or writing over the end of the buffer. On CPUs that don't require the workaround only ECB is affected. On Nano CPUs both ECB and CBC are affected. This patch fixes it by doing the subtraction prior to the assembly. Fixes: a76c1c23d0c3 ("crypto: padlock-aes - work around Nano CPU...") Cc: Reported-by: Jamie Heilman Signed-off-by: Herbert Xu --- drivers/crypto/padlock-aes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 1c6cbda56afe..09d823d36d3a 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -266,6 +266,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, return; } + count -= initial; + if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) @@ -273,7 +275,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ : "+S"(input), "+D"(output) - : "d"(control_word), "b"(key), "c"(count - initial)); + : "d"(control_word), "b"(key), "c"(count)); } static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, @@ -284,6 +286,8 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, if (count < cbc_fetch_blocks) return cbc_crypt(input, output, key, iv, control_word, count); + count -= initial; + if (initial) asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) @@ -291,7 +295,7 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ : "+S" (input), "+D" (output), "+a" (iv) - : "d" (control_word), "b" (key), "c" (count-initial)); + : "d" (control_word), "b" (key), "c" (count)); return iv; } -- cgit From 0a06d4256674c4e041945b52044941995fee237d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 19 Jul 2018 10:31:00 -0700 Subject: KVM: vmx: use local variable for current_vmptr when emulating VMPTRST Do not expose the address of vmx->nested.current_vmptr to kvm_write_guest_virt_system() as the resulting __copy_to_user() call will trigger a WARN when CONFIG_HARDENED_USERCOPY is enabled. Opportunistically clean up variable names in handle_vmptrst() to improve readability, e.g. vmcs_gva is misleading as the memory operand of VMPTRST is plain memory, not a VMCS. Signed-off-by: Sean Christopherson Tested-by: Peter Shier Reviewed-by: Peter Shier Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e30da9a2430c..548bef5359e6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8480,21 +8480,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) /* Emulate the VMPTRST instruction */ static int handle_vmptrst(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - gva_t vmcs_gva; + unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION); + u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); + gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; struct x86_exception e; + gva_t gva; if (!nested_vmx_check_permission(vcpu)) return 1; - if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, true, &vmcs_gva)) + if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva)) return 1; /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, vmcs_gva, - (void *)&to_vmx(vcpu)->nested.current_vmptr, - sizeof(u64), &e)) { + if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, + sizeof(gpa_t), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } -- cgit From 63aff65573d73eb8dda4732ad4ef222dd35e4862 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 19 Jul 2018 21:59:07 +0300 Subject: kvm: x86: vmx: fix vpid leak VPID for the nested vcpu is allocated at vmx_create_vcpu whenever nested vmx is turned on with the module parameter. However, it's only freed if the L1 guest has executed VMXON which is not a given. As a result, on a system with nested==on every creation+deletion of an L1 vcpu without running an L2 guest results in leaking one vpid. Since the total number of vpids is limited to 64k, they can eventually get exhausted, preventing L2 from starting. Delay allocation of the L2 vpid until VMXON emulation, thus matching its freeing. Fixes: 5c614b3583e7b6dab0c86356fa36c2bcbb8322a0 Cc: stable@vger.kernel.org Signed-off-by: Roman Kagan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 548bef5359e6..5d8e317c2b04 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7893,6 +7893,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; + vmx->nested.vpid02 = allocate_vpid(); + vmx->nested.vmxon = true; return 0; @@ -10369,11 +10371,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vmcs; } - if (nested) { + if (nested) nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, kvm_vcpu_apicv_active(&vmx->vcpu)); - vmx->nested.vpid02 = allocate_vpid(); - } vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; @@ -10390,7 +10390,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return &vmx->vcpu; free_vmcs: - free_vpid(vmx->nested.vpid02); free_loaded_vmcs(vmx->loaded_vmcs); free_msrs: kfree(vmx->guest_msrs); -- cgit From 6d20caed9b92d0db005324281e10bb9fd4813a32 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 24 Jul 2018 16:29:06 +0300 Subject: ARC: Add Ofer Levi as plat-eznps maintainer Signed-off-by: Leon Romanovsky Signed-off-by: Vineet Gupta --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0fe4228f78cb..d6d86003fafd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5444,6 +5444,7 @@ F: drivers/iommu/exynos-iommu.c EZchip NPS platform support M: Vineet Gupta +M: Ofer Levi S: Supported F: arch/arc/plat-eznps F: arch/arc/boot/dts/eznps.dts -- cgit From addb8a6559f0f8b5a37582b7ca698358445a55bf Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 11 Jul 2018 11:23:52 +0300 Subject: RDMA/uverbs: Expand primary and alt AV port checks The commit cited below checked that the port numbers provided in the primary and alt AVs are legal. That is sufficient to prevent a kernel panic. However, it is not sufficient for correct operation. In Linux, AVs (both primary and alt) must be completely self-described. We do not accept an AV from userspace without an embedded port number. (This has been the case since kernel 3.14 commit dbf727de7440 ("IB/core: Use GID table in AH creation and dmac resolution")). For the primary AV, this embedded port number must match the port number specified with IB_QP_PORT. We also expect the port number embedded in the alt AV to match the alt_port_num value passed by the userspace driver in the modify_qp command base structure. Add these checks to modify_qp. Cc: # 4.16 Fixes: 5d4c05c3ee36 ("RDMA/uverbs: Sanitize user entered port numbers prior to access it") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 59 +++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index cc06e8404e9b..583d3a10b940 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1984,15 +1984,64 @@ static int modify_qp(struct ib_uverbs_file *file, goto release_qp; } - if ((cmd->base.attr_mask & IB_QP_AV) && - !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { - ret = -EINVAL; - goto release_qp; + if ((cmd->base.attr_mask & IB_QP_AV)) { + if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) { + ret = -EINVAL; + goto release_qp; + } + + if (cmd->base.attr_mask & IB_QP_STATE && + cmd->base.qp_state == IB_QPS_RTR) { + /* We are in INIT->RTR TRANSITION (if we are not, + * this transition will be rejected in subsequent checks). + * In the INIT->RTR transition, we cannot have IB_QP_PORT set, + * but the IB_QP_STATE flag is required. + * + * Since kernel 3.14 (commit dbf727de7440), the uverbs driver, + * when IB_QP_AV is set, has required inclusion of a valid + * port number in the primary AV. (AVs are created and handled + * differently for infiniband and ethernet (RoCE) ports). + * + * Check the port number included in the primary AV against + * the port number in the qp struct, which was set (and saved) + * in the RST->INIT transition. + */ + if (cmd->base.dest.port_num != qp->real_qp->port) { + ret = -EINVAL; + goto release_qp; + } + } else { + /* We are in SQD->SQD. (If we are not, this transition will + * be rejected later in the verbs layer checks). + * Check for both IB_QP_PORT and IB_QP_AV, these can be set + * together in the SQD->SQD transition. + * + * If only IP_QP_AV was set, add in IB_QP_PORT as well (the + * verbs layer driver does not track primary port changes + * resulting from path migration. Thus, in SQD, if the primary + * AV is modified, the primary port should also be modified). + * + * Note that in this transition, the IB_QP_STATE flag + * is not allowed. + */ + if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == (IB_QP_AV | IB_QP_PORT)) && + cmd->base.port_num != cmd->base.dest.port_num) { + ret = -EINVAL; + goto release_qp; + } + if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT)) + == IB_QP_AV) { + cmd->base.attr_mask |= IB_QP_PORT; + cmd->base.port_num = cmd->base.dest.port_num; + } + } } if ((cmd->base.attr_mask & IB_QP_ALT_PATH) && (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) || - !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) { + !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) || + cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) { ret = -EINVAL; goto release_qp; } -- cgit From 603ba2dfb338b307aebe95fe344c479a59b3a175 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 24 Jul 2018 15:32:15 +0200 Subject: drm/atomic: Check old_plane_state->crtc in drm_atomic_helper_async_check() Async plane update is supposed to work only when updating the FB or FB position of an already enabled plane. That does not apply to requests where the plane was previously disabled or assigned to a different CTRC. Check old_plane_state->crtc value to make sure async plane update is allowed. Fixes: fef9df8b5945 ("drm/atomic: initial support for asynchronous plane update") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180724133215.31917-1-boris.brezillon@bootlin.com --- drivers/gpu/drm/drm_atomic_helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 130da5195f3b..ff858b890045 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1527,7 +1527,8 @@ int drm_atomic_helper_async_check(struct drm_device *dev, if (n_planes != 1) return -EINVAL; - if (!new_plane_state->crtc) + if (!new_plane_state->crtc || + old_plane_state->crtc != new_plane_state->crtc) return -EINVAL; funcs = plane->helper_private; -- cgit From de2d8db395c32d121d02871819444b631f73e0b6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 24 Jul 2018 15:33:00 +0200 Subject: drm/atomic: Initialize variables in drm_atomic_helper_async_check() to make gcc happy drm_atomic_helper_async_check() declares the plane, old_plane_state and new_plane_state variables to iterate over all planes of the atomic state and make sure only one plane is enabled. Unfortunately gcc is not smart enough to figure out that the check on n_planes is enough to guarantee that plane, new_plane_state and old_plane_state are initialized. Explicitly initialize those variables to NULL to make gcc happy. Fixes: fef9df8b5945 ("drm/atomic: initial support for asynchronous plane update") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180724133300.32023-1-boris.brezillon@bootlin.com --- drivers/gpu/drm/drm_atomic_helper.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index ff858b890045..81e32199d3ef 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1510,8 +1510,9 @@ int drm_atomic_helper_async_check(struct drm_device *dev, { struct drm_crtc *crtc; struct drm_crtc_state *crtc_state; - struct drm_plane *plane; - struct drm_plane_state *old_plane_state, *new_plane_state; + struct drm_plane *plane = NULL; + struct drm_plane_state *old_plane_state = NULL; + struct drm_plane_state *new_plane_state = NULL; const struct drm_plane_helper_funcs *funcs; int i, n_planes = 0; -- cgit From a6a00918d4ad8718c3ccde38c02cec17f116b2fd Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 24 Jul 2018 15:36:01 +0200 Subject: drm/vc4: Reset ->{x, y}_scaling[1] when dealing with uniplanar formats This is needed to ensure ->is_unity is correct when the plane was previously configured to output a multi-planar format with scaling enabled, and is then being reconfigured to output a uniplanar format. Fixes: fc04023fafec ("drm/vc4: Add support for YUV planes.") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180724133601.32114-1-boris.brezillon@bootlin.com --- drivers/gpu/drm/vc4/vc4_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 1d34619eb3fe..a951ec75d01f 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -320,6 +320,9 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->x_scaling[0] = VC4_SCALING_TPZ; if (vc4_state->y_scaling[0] == VC4_SCALING_NONE) vc4_state->y_scaling[0] = VC4_SCALING_TPZ; + } else { + vc4_state->x_scaling[1] = VC4_SCALING_NONE; + vc4_state->y_scaling[1] = VC4_SCALING_NONE; } vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && -- cgit From 92cab799bbc6fa1fca84bd1692285a5f926c17e9 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 4 Jul 2018 10:57:58 -0400 Subject: media: bpf: ensure bpf program is freed on detach Currently we are leaking bpf programs when they are detached from the lirc device; the refcount never reaches zero. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/bpf-lirc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 40826bba06b6..55400317ec53 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -174,6 +174,7 @@ static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog) rcu_assign_pointer(raw->progs, new_array); bpf_prog_array_free(old_array); + bpf_prog_put(prog); unlock: mutex_unlock(&ir_raw_handler_lock); return ret; -- cgit From 4c612add7b18844ddd733ebdcbe754520155999b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 24 Jul 2018 17:13:02 +0300 Subject: ARC: dma [non IOC]: fix arc_dma_sync_single_for_(device|cpu) ARC backend for dma_sync_single_for_(device|cpu) was broken as it was not honoring the @dir argument and simply forcing it based on the call: - arc_dma_sync_single_for_device(dir) assumed DMA_TO_DEVICE (cache wback) - arc_dma_sync_single_for_cpu(dir) assumed DMA_FROM_DEVICE (cache inv) This is not true given the DMA API programming model and has been discussed here [1] in some detail. Interestingly while the deficiency has been there forever, it only started showing up after 4.17 dma common ops rework, commit a8eb92d02dd7 ("arc: fix arc_dma_{map,unmap}_page") which wired up these calls under the more commonly used dma_map_page API triggering the issue. [1]: https://lkml.org/lkml/2018/5/18/979 Fixes: commit a8eb92d02dd7 ("arc: fix arc_dma_{map,unmap}_page") Cc: stable@kernel.org # v4.17+ Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [vgupta: reworked changelog] --- arch/arc/mm/dma.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 8c1071840979..ec47e6079f5d 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -129,14 +129,59 @@ int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } +/* + * Cache operations depending on function and direction argument, inspired by + * https://lkml.org/lkml/2018/5/18/979 + * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20] + * dma-mapping: provide a generic dma-noncoherent implementation)" + * + * | map == for_device | unmap == for_cpu + * |---------------------------------------------------------------- + * TO_DEV | writeback writeback | none none + * FROM_DEV | invalidate invalidate | invalidate* invalidate* + * BIDIR | writeback+inv writeback+inv | invalidate invalidate + * + * [*] needed for CPU speculative prefetches + * + * NOTE: we don't check the validity of direction argument as it is done in + * upper layer functions (in include/linux/dma-mapping.h) + */ + void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_cache_wback(paddr, size); + switch (dir) { + case DMA_TO_DEVICE: + dma_cache_wback(paddr, size); + break; + + case DMA_FROM_DEVICE: + dma_cache_inv(paddr, size); + break; + + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv(paddr, size); + break; + + default: + break; + } } void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_cache_inv(paddr, size); + switch (dir) { + case DMA_TO_DEVICE: + break; + + /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */ + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv(paddr, size); + break; + + default: + break; + } } -- cgit From eb2777397fd83a4a7eaa26984d09d3babb845d2a Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 16:15:43 +0300 Subject: ARC: dma [non-IOC] setup SMP_CACHE_BYTES and cache_line_size As for today we don't setup SMP_CACHE_BYTES and cache_line_size for ARC, so they are set to L1_CACHE_BYTES by default. L1 line length (L1_CACHE_BYTES) might be easily smaller than L2 line (which is usually the case BTW). This breaks code. For example this breaks ethernet infrastructure on HSDK/AXS103 boards with IOC disabled, involving manual cache flushes Functions which alloc and manage sk_buff packet data area rely on SMP_CACHE_BYTES define. In the result we can share last L2 cache line in sk_buff linear packet data area between DMA buffer and some useful data in other structure. So we can lose this data when we invalidate DMA buffer. sk_buff linear packet data area | | | skb->end skb->tail V | | V V ----------------------------------------------. packet data | | ----------------------------------------------. ---------------------.--------------------------------------------------. SLC line | SLC (L2 cache) line (128B) | ---------------------.--------------------------------------------------. ^ ^ | | These cache lines will be invalidated when we invalidate skb linear packet data area before DMA transaction starting. This leads to issues painful to debug as it reproduces only if (sk_buff->end - sk_buff->tail) < SLC_LINE_SIZE and if we have some useful data right after sk_buff->end. Fix that by hardcode SMP_CACHE_BYTES to max line length we may have. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 3 +++ arch/arc/include/asm/cache.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9cf59fc60eab..5151d81476a1 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -50,6 +50,9 @@ config ARC select HAVE_KERNEL_LZMA select ARCH_HAS_PTE_SPECIAL +config ARCH_HAS_CACHE_LINE_SIZE + def_bool y + config MIGHT_HAVE_PCI bool diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 8486f328cc5d..ff7d3232764a 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -48,7 +48,9 @@ }) /* Largest line length for either L1 or L2 is 128 bytes */ -#define ARCH_DMA_MINALIGN 128 +#define SMP_CACHE_BYTES 128 +#define cache_line_size() SMP_CACHE_BYTES +#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); -- cgit From a54e43f993f8ec2f063b616a0e4d2b09e08d78a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 Jul 2018 17:10:39 -0500 Subject: PCI: mobiveil: Avoid integer overflow in IB_WIN_SIZE IB_WIN_SIZE is larger than INT_MAX so we need to cast it to u64. Fixes: 9af6bcb11e12 ("PCI: mobiveil: Add Mobiveil PCIe Host Bridge IP driver") Signed-off-by: Dan Carpenter Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-mobiveil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-mobiveil.c b/drivers/pci/controller/pcie-mobiveil.c index 4d6c20e47bed..cf0aa7cee5b0 100644 --- a/drivers/pci/controller/pcie-mobiveil.c +++ b/drivers/pci/controller/pcie-mobiveil.c @@ -107,7 +107,7 @@ #define CFG_WINDOW_TYPE 0 #define IO_WINDOW_TYPE 1 #define MEM_WINDOW_TYPE 2 -#define IB_WIN_SIZE (256 * 1024 * 1024 * 1024) +#define IB_WIN_SIZE ((u64)256 * 1024 * 1024 * 1024) #define MAX_PIO_WINDOWS 8 /* Parameters for the waiting for link up routine */ -- cgit From f5dbee6e3881b1dbfdcc36008d48bd29549ab2f4 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 28 Jul 2018 05:11:15 -0400 Subject: media: rc: read out of bounds if bpf reports high protocol number The repeat period is read from a static array. If a keydown event is reported from bpf with a high protocol number, we read out of bounds. This is unlikely to end up with a reasonable repeat period at the best of times, in which case no timely key up event is generated. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/rc-main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 2e222d9ee01f..ca68e1d2b2f9 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -679,6 +679,14 @@ static void ir_timer_repeat(struct timer_list *t) spin_unlock_irqrestore(&dev->keylock, flags); } +static unsigned int repeat_period(int protocol) +{ + if (protocol >= ARRAY_SIZE(protocols)) + return 100; + + return protocols[protocol].repeat_period; +} + /** * rc_repeat() - signals that a key is still pressed * @dev: the struct rc_dev descriptor of the device @@ -691,7 +699,7 @@ void rc_repeat(struct rc_dev *dev) { unsigned long flags; unsigned int timeout = nsecs_to_jiffies(dev->timeout) + - msecs_to_jiffies(protocols[dev->last_protocol].repeat_period); + msecs_to_jiffies(repeat_period(dev->last_protocol)); struct lirc_scancode sc = { .scancode = dev->last_scancode, .rc_proto = dev->last_protocol, .keycode = dev->keypressed ? dev->last_keycode : KEY_RESERVED, @@ -803,7 +811,7 @@ void rc_keydown(struct rc_dev *dev, enum rc_proto protocol, u32 scancode, if (dev->keypressed) { dev->keyup_jiffies = jiffies + nsecs_to_jiffies(dev->timeout) + - msecs_to_jiffies(protocols[protocol].repeat_period); + msecs_to_jiffies(repeat_period(protocol)); mod_timer(&dev->timer_keyup, dev->keyup_jiffies); } spin_unlock_irqrestore(&dev->keylock, flags); -- cgit From 8eb0e6421958e9777db98448a4030d8ae940c9a0 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 27 Jul 2018 13:19:45 -0400 Subject: media: v4l: vsp1: Fix deadlock in VSPDL DRM pipelines The VSP uses a lock to protect the BRU and BRS assignment when configuring pipelines. The lock is taken in vsp1_du_atomic_begin() and released in vsp1_du_atomic_flush(), as well as taken and released in vsp1_du_setup_lif(). This guards against multiple pipelines trying to assign the same BRU and BRS at the same time. The DRM framework calls the .atomic_begin() operations in a loop over all CRTCs included in an atomic commit. On a VSPDL (the only VSP type where this matters), a single VSP instance handles two CRTCs, with a single lock. This results in a deadlock when the .atomic_begin() operation is called on the second CRTC. The DRM framework serializes atomic commits that affect the same CRTCs, but doesn't know about two CRTCs sharing the same VSPDL. Two commits affecting the VSPDL LIF0 and LIF1 respectively can thus race each other, hence the need for a lock. This could be fixed on the DRM side by forcing serialization of commits affecting CRTCs backed by the same VSPDL, but that would negatively affect performances, as the locking is only needed when the BRU and BRS need to be reassigned, which is an uncommon case. The lock protects the whole .atomic_begin() to .atomic_flush() sequence. The only operation that can occur in-between is vsp1_du_atomic_update(), which doesn't touch the BRU and BRS, and thus doesn't need to be protected by the lock. We can thus only take the lock around the pipeline setup calls in vsp1_du_atomic_flush(), which fixes the deadlock. Fixes: f81f9adc4ee1 ("media: v4l: vsp1: Assign BRU and BRS to pipelines dynamically") Signed-off-by: Laurent Pinchart Reviewed-by: Kieran Bingham Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/vsp1/vsp1_drm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c index edb35a5c57ea..a99fc0ced7a7 100644 --- a/drivers/media/platform/vsp1/vsp1_drm.c +++ b/drivers/media/platform/vsp1/vsp1_drm.c @@ -728,9 +728,6 @@ EXPORT_SYMBOL_GPL(vsp1_du_setup_lif); */ void vsp1_du_atomic_begin(struct device *dev, unsigned int pipe_index) { - struct vsp1_device *vsp1 = dev_get_drvdata(dev); - - mutex_lock(&vsp1->drm->lock); } EXPORT_SYMBOL_GPL(vsp1_du_atomic_begin); @@ -846,6 +843,7 @@ void vsp1_du_atomic_flush(struct device *dev, unsigned int pipe_index, drm_pipe->crc = cfg->crc; + mutex_lock(&vsp1->drm->lock); vsp1_du_pipeline_setup_inputs(vsp1, pipe); vsp1_du_pipeline_configure(pipe); mutex_unlock(&vsp1->drm->lock); -- cgit From 2c3ee0e1779d2e08bc08734bc8475daaf94d0ba4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 11:41:56 -0300 Subject: tools headers uapi: Update tools's copy of linux/perf_event.h To get the changes in: 6cbc304f2f36 ("perf/x86/intel: Fix unwind errors from PEBS entries (mk-II)") That do not imply any changes in the tooling side, the (ab)use of sample_type is entirely done in kernel space, nothing for userspace to witness here. This cures the following warning during perf's build: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Prashant Bhole Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-o64mjoy35s9gd1gitunw1zg4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b8e288a1f740..eeb787b1c53c 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -143,6 +143,8 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ + + __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, }; /* -- cgit From 7def16d1d2668a4a3663291c9ace307b81934704 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 11:48:19 -0300 Subject: tools headers powerpc: Update asm/unistd.h copy to pick new The new 'io_pgetevents' syscall was wired up in PowerPC in the following cset: b2f82565f2ca ("powerpc: Wire up io_pgetevents") Update tools/arch/powerpc/ copy of the asm/unistd.h file so that 'perf trace' on PowerPC gets it in its syscall table. This elliminated the following perf build warning: Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/unistd.h' differs from latest version at 'arch/powerpc/include/uapi/asm/unistd.h' Cc: Alexander Shishkin Cc: Breno Leitao Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: linuxppc-dev@lists.ozlabs.org Cc: Michael Ellerman Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Richter Link: https://lkml.kernel.org/n/tip-9uvu7tz4ud3bxxfyxwryuz47@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/unistd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h index ac5ba55066dd..985534d0b448 100644 --- a/tools/arch/powerpc/include/uapi/asm/unistd.h +++ b/tools/arch/powerpc/include/uapi/asm/unistd.h @@ -399,5 +399,6 @@ #define __NR_pkey_free 385 #define __NR_pkey_mprotect 386 #define __NR_rseq 387 +#define __NR_io_pgetevents 388 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit From fc73bfd6005c7fe5c3a2f04d4db7fa5d37cd3ebd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 11:56:13 -0300 Subject: tools headers uapi: Refresh linux/bpf.h copy To get the changes in: 4c79579b44b1 ("bpf: Change bpf_fib_lookup to return lookup status") That do not entail changes in tools/perf/ use of it, elliminating the following perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h' Cc: Adrian Hunter Cc: Daniel Borkmann Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-yei494y6b3mn6bjzz9g0ws12@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 59b19b6a40d7..b7db3261c62d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1857,7 +1857,8 @@ union bpf_attr { * is resolved), the nexthop address is returned in ipv4_dst * or ipv6_dst based on family, smac is set to mac address of * egress device, dmac is set to nexthop mac address, rt_metric - * is set to metric from route (IPv4/IPv6 only). + * is set to metric from route (IPv4/IPv6 only), and ifindex + * is set to the device index of the nexthop from the FIB lookup. * * *plen* argument is the size of the passed in struct. * *flags* argument can be a combination of one or more of the @@ -1873,9 +1874,10 @@ union bpf_attr { * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. * Return - * Egress device index on success, 0 if packet needs to continue - * up the stack for further processing or a negative error in case - * of failure. + * * < 0 if any input argument is invalid + * * 0 on success (packet is forwarded, nexthop neighbor exists) + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the + * * packet is not forwarded or needs assist from full stack * * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) * Description @@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args { #define BPF_FIB_LOOKUP_DIRECT BIT(0) #define BPF_FIB_LOOKUP_OUTPUT BIT(1) +enum { + BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ + BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ + BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ + BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ + BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ + BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ +}; + struct bpf_fib_lookup { /* input: network family for lookup (AF_INET, AF_INET6) * output: network family of egress nexthop @@ -2625,7 +2639,11 @@ struct bpf_fib_lookup { /* total length of packet from network header - used for MTU check */ __u16 tot_len; - __u32 ifindex; /* L3 device index for lookup */ + + /* input: L3 device index for lookup + * output: device index from FIB lookup + */ + __u32 ifindex; union { /* inputs to lookup */ -- cgit From 1f27a050fc679d16e68a40e0bb575364a89fad66 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 12:26:54 -0300 Subject: tools arch: Update arch/x86/lib/memcpy_64.S copy used in 'perf bench mem memcpy' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To cope with the changes in: 12c89130a56a ("x86/asm/memcpy_mcsafe: Add write-protection-fault handling") 60622d68227d ("x86/asm/memcpy_mcsafe: Return bytes remaining") bd131544aa7e ("x86/asm/memcpy_mcsafe: Add labels for __memcpy_mcsafe() write fault handling") da7bc9c57eb0 ("x86/asm/memcpy_mcsafe: Remove loop unrolling") This needed introducing a file with a copy of the mcsafe_handle_tail() function, that is used in the new memcpy_64.S file, as well as a dummy mcsafe_test.h header. Testing it: $ nm ~/bin/perf | grep mcsafe 0000000000484130 T mcsafe_handle_tail 0000000000484300 T __memcpy_mcsafe $ $ perf bench mem memcpy # Running 'mem/memcpy' benchmark: # function 'default' (Default memcpy() provided by glibc) # Copying 1MB bytes ... 44.389205 GB/sec # function 'x86-64-unrolled' (unrolled memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB bytes ... 22.710756 GB/sec # function 'x86-64-movsq' (movsq-based memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB bytes ... 42.459239 GB/sec # function 'x86-64-movsb' (movsb-based memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB bytes ... 42.459239 GB/sec $ This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' Cc: Adrian Hunter Cc: Dan Williams Cc: David Ahern Cc: Jiri Olsa Cc: Mika Penttilä Cc: Namhyung Kim Cc: Tony Luck Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-igdpciheradk3gb3qqal52d0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/mcsafe_test.h | 13 ++++ tools/arch/x86/lib/memcpy_64.S | 112 +++++++++++++++---------------- tools/perf/bench/Build | 1 + tools/perf/bench/mem-memcpy-x86-64-asm.S | 1 + tools/perf/bench/mem-memcpy-x86-64-lib.c | 24 +++++++ 5 files changed, 93 insertions(+), 58 deletions(-) create mode 100644 tools/arch/x86/include/asm/mcsafe_test.h create mode 100644 tools/perf/bench/mem-memcpy-x86-64-lib.c diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h new file mode 100644 index 000000000000..2ccd588fbad4 --- /dev/null +++ b/tools/arch/x86/include/asm/mcsafe_test.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MCSAFE_TEST_H_ +#define _MCSAFE_TEST_H_ + +.macro MCSAFE_TEST_CTL +.endm + +.macro MCSAFE_TEST_SRC reg count target +.endm + +.macro MCSAFE_TEST_DST reg count target +.endm +#endif /* _MCSAFE_TEST_H_ */ diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 9a53a06e5a3e..298ef1479240 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -183,12 +184,15 @@ ENTRY(memcpy_orig) ENDPROC(memcpy_orig) #ifndef CONFIG_UML + +MCSAFE_TEST_CTL + /* - * memcpy_mcsafe_unrolled - memory copy with machine check exception handling + * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe_unrolled) +ENTRY(__memcpy_mcsafe) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -204,58 +208,33 @@ ENTRY(memcpy_mcsafe_unrolled) subl $8, %ecx negl %ecx subl %ecx, %edx -.L_copy_leading_bytes: +.L_read_leading_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes + MCSAFE_TEST_DST %rdi 1 .E_leading_bytes +.L_write_leading_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_leading_bytes + jnz .L_read_leading_bytes .L_8byte_aligned: - /* Figure out how many whole cache lines (64-bytes) to copy */ - movl %edx, %ecx - andl $63, %edx - shrl $6, %ecx - jz .L_no_whole_cache_lines - - /* Loop copying whole cache lines */ -.L_cache_w0: movq (%rsi), %r8 -.L_cache_w1: movq 1*8(%rsi), %r9 -.L_cache_w2: movq 2*8(%rsi), %r10 -.L_cache_w3: movq 3*8(%rsi), %r11 - movq %r8, (%rdi) - movq %r9, 1*8(%rdi) - movq %r10, 2*8(%rdi) - movq %r11, 3*8(%rdi) -.L_cache_w4: movq 4*8(%rsi), %r8 -.L_cache_w5: movq 5*8(%rsi), %r9 -.L_cache_w6: movq 6*8(%rsi), %r10 -.L_cache_w7: movq 7*8(%rsi), %r11 - movq %r8, 4*8(%rdi) - movq %r9, 5*8(%rdi) - movq %r10, 6*8(%rdi) - movq %r11, 7*8(%rdi) - leaq 64(%rsi), %rsi - leaq 64(%rdi), %rdi - decl %ecx - jnz .L_cache_w0 - - /* Are there any trailing 8-byte words? */ -.L_no_whole_cache_lines: movl %edx, %ecx andl $7, %edx shrl $3, %ecx jz .L_no_whole_words - /* Copy trailing words */ -.L_copy_trailing_words: +.L_read_words: movq (%rsi), %r8 - mov %r8, (%rdi) - leaq 8(%rsi), %rsi - leaq 8(%rdi), %rdi + MCSAFE_TEST_SRC %rsi 8 .E_read_words + MCSAFE_TEST_DST %rdi 8 .E_write_words +.L_write_words: + movq %r8, (%rdi) + addq $8, %rsi + addq $8, %rdi decl %ecx - jnz .L_copy_trailing_words + jnz .L_read_words /* Any trailing bytes? */ .L_no_whole_words: @@ -264,38 +243,55 @@ ENTRY(memcpy_mcsafe_unrolled) /* Copy trailing bytes */ movl %edx, %ecx -.L_copy_trailing_bytes: +.L_read_trailing_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes + MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes +.L_write_trailing_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_trailing_bytes + jnz .L_read_trailing_bytes /* Copy successful. Return zero */ .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe_unrolled) -EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) +ENDPROC(__memcpy_mcsafe) +EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" - /* Return -EFAULT for any failure */ -.L_memcpy_mcsafe_fail: - mov $-EFAULT, %rax + /* + * Return number of bytes not copied for any failure. Note that + * there is no "tail" handling since the source buffer is 8-byte + * aligned and poison is cacheline aligned. + */ +.E_read_words: + shll $3, %ecx +.E_leading_bytes: + addl %edx, %ecx +.E_trailing_bytes: + mov %ecx, %eax ret + /* + * For write fault handling, given the destination is unaligned, + * we handle faults on multi-byte writes with a byte-by-byte + * copy up to the write-protected page. + */ +.E_write_words: + shll $3, %ecx + addl %edx, %ecx + movl %ecx, %edx + jmp mcsafe_handle_tail + .previous - _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) + _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE(.L_write_words, .E_write_words) + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) #endif diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 60bf11943047..eafce1a130a1 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -7,6 +7,7 @@ perf-y += futex-wake-parallel.o perf-y += futex-requeue.o perf-y += futex-lock-pi.o +perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index b43f8d2a34ec..9ad015a1e202 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -6,6 +6,7 @@ #define altinstr_replacement text #define globl p2align 4; .globl #define _ASM_EXTABLE_FAULT(x, y) +#define _ASM_EXTABLE(x, y) #include "../../arch/x86/lib/memcpy_64.S" /* diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c new file mode 100644 index 000000000000..4130734dde84 --- /dev/null +++ b/tools/perf/bench/mem-memcpy-x86-64-lib.c @@ -0,0 +1,24 @@ +/* + * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy + * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy' + * happy. + */ +#include + +unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt); +unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len); + +unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len) +{ + for (; len; --len, to++, from++) { + /* + * Call the assembly routine back directly since + * memcpy_mcsafe() may silently fallback to memcpy. + */ + unsigned long rem = __memcpy_mcsafe(to, from, 1); + + if (rem) + break; + } + return len; +} -- cgit From 44fe619b1418ff4e9d2f9518a940fbe2fb686a08 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 30 Jul 2018 13:15:03 -0300 Subject: perf tools: Fix the build on the alpine:edge distro The UAPI file byteorder/little_endian.h uses the __always_inline define without including the header where it is defined, linux/stddef.h, this ends up working in all the other distros because that file gets included seemingly by luck from one of the files included from little_endian.h. But not on Alpine:edge, that fails for all files where perf_event.h is included but linux/stddef.h isn't include before that. Adding the missing linux/stddef.h file where it breaks on Alpine:edge to fix that, in all other distros, that is just a very small header anyway. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-9r1pifftxvuxms8l7ir73p5l@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/pmu.c | 1 + tools/perf/arch/x86/util/tsc.c | 1 + tools/perf/perf.h | 1 + tools/perf/util/header.h | 1 + tools/perf/util/namespaces.h | 1 + 5 files changed, 5 insertions(+) diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index 63a74c32ddc5..e33ef5bc31c5 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include "../../util/intel-pt.h" diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 06bae7023a51..950539f9a4f7 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -2,6 +2,7 @@ #include #include +#include #include #include "../../perf.h" diff --git a/tools/perf/perf.h b/tools/perf/perf.h index a1a97956136f..d215714f48df 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -5,6 +5,7 @@ #include #include #include +#include #include extern bool test_attr__enabled; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 90d4577a92dc..6d7fe44aadc0 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -2,6 +2,7 @@ #ifndef __PERF_HEADER_H #define __PERF_HEADER_H +#include #include #include #include diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 760558dcfd18..cae1a9a39722 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -10,6 +10,7 @@ #define __PERF_NAMESPACES_H #include +#include #include #include #include -- cgit From 386177da9e601ed176d54c04324d9ebf44c70620 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 16:15:44 +0300 Subject: ARC: add SMP_CACHE_BYTES value validate Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger or equal to any cache line length by comparing it with values previously read from ARC cache BCR registers. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 9dbe645ee127..b95365e1253a 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1246,6 +1246,16 @@ void __init arc_cache_init_master(void) } } + /* + * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger + * or equal to any cache line length. + */ + BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES, + "SMP_CACHE_BYTES must be >= any cache line length"); + if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES)) + panic("L2 Cache line [%d] > kernel Config [%d]\n", + l2_line_sz, SMP_CACHE_BYTES); + /* Note that SLC disable not formally supported till HS 3.0 */ if (is_isa_arcv2() && l2_line_sz && !slc_enable) arc_slc_disable(); -- cgit From 05b466bf846d2e8d2f0baf8dfd81a42cc933e237 Mon Sep 17 00:00:00 2001 From: Ofer Levi Date: Sat, 28 Jul 2018 10:54:41 +0300 Subject: ARC: [plat-eznps] Add missing struct nps_host_reg_aux_dpc Fixing compilation issue caused by missing struct nps_host_reg_aux_dpc definition. Fixes: 3f9cd874dcc87 ("ARC: [plat-eznps] avoid toggling of DPC register") Reported-by: Randy Dunlap Signed-off-by: Ofer Levi Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/include/plat/ctop.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index 0c7d11022d0f..bd34b96bc591 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -143,6 +143,15 @@ struct nps_host_reg_gim_p_int_dst { }; /* AUX registers definition */ +struct nps_host_reg_aux_dpc { + union { + struct { + u32 ien:1, men:1, hen:1, reserved:29; + }; + u32 value; + }; +}; + struct nps_host_reg_aux_udmc { union { struct { -- cgit From b1f32ce1c3d2c11959b7e6a2c58dc5197c581966 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 29 Jul 2018 11:10:51 -0700 Subject: arc: [plat-eznps] fix data type errors in platform headers Add to fix build errors. Both ctop.h and use u32 types and cause many errors. Examples: ../include/soc/nps/common.h:71:4: error: unknown type name 'u32' u32 __reserved:20, cluster:4, core:4, thread:4; ../include/soc/nps/common.h:76:3: error: unknown type name 'u32' u32 value; ../include/soc/nps/common.h:124:4: error: unknown type name 'u32' u32 base:8, cl_x:4, cl_y:4, ../include/soc/nps/common.h:127:3: error: unknown type name 'u32' u32 value; ../arch/arc/plat-eznps/include/plat/ctop.h:83:4: error: unknown type name 'u32' u32 gen:1, gdis:1, clk_gate_dis:1, asb:1, ../arch/arc/plat-eznps/include/plat/ctop.h:86:3: error: unknown type name 'u32' u32 value; ../arch/arc/plat-eznps/include/plat/ctop.h:93:4: error: unknown type name 'u32' u32 csa:22, dmsid:6, __reserved:3, cs:1; ../arch/arc/plat-eznps/include/plat/ctop.h:95:3: error: unknown type name 'u32' u32 value; Cc: linux-snps-arc@lists.infradead.org Cc: Ofer Levi Reviewed-by: Leon Romanovsky Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/include/plat/ctop.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index bd34b96bc591..4f6a1673b3a6 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -21,6 +21,7 @@ #error "Incorrect ctop.h include" #endif +#include #include /* core auxiliary registers */ -- cgit From 9e2ea405543d9ddfe05b351f1679e53bd9c11f80 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: arc: [plat-eznps] fix printk warning in arc/plat-eznps/mtm.c Fix printk format warning in arch/arc/plat-eznps/mtm.c: In file included from ../include/linux/printk.h:7, from ../include/linux/kernel.h:14, from ../include/linux/list.h:9, from ../include/linux/smp.h:12, from ../arch/arc/plat-eznps/mtm.c:17: ../arch/arc/plat-eznps/mtm.c: In function 'set_mtm_hs_ctr': ../include/linux/kern_levels.h:5:18: warning: format '%d' expects argument of type 'int', but argument 2 has type 'long int' [-Wformat=] #define KERN_SOH "\001" /* ASCII Start Of Header */ ^~~~~~ ../include/linux/kern_levels.h:11:18: note: in expansion of macro 'KERN_SOH' #define KERN_ERR KERN_SOH "3" /* error conditions */ ^~~~~~~~ ../include/linux/printk.h:308:9: note: in expansion of macro 'KERN_ERR' printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~ ../arch/arc/plat-eznps/mtm.c:166:3: note: in expansion of macro 'pr_err' pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", ^~~~~~ ../arch/arc/plat-eznps/mtm.c:166:40: note: format string is defined here pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", ~^ %ld The hs_ctr variable can just be int instead of long, so also change kstrtol() to kstrtoint() and leave the format string as %d. Also add 2 header files since they are used in mtm.c and we prefer not to depend on accidental/indirect #includes. Cc: linux-snps-arc@lists.infradead.org Cc: Ofer Levi Reviewed-by: Leon Romanovsky Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/mtm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c index 2388de3d09ef..ed0077ef666e 100644 --- a/arch/arc/plat-eznps/mtm.c +++ b/arch/arc/plat-eznps/mtm.c @@ -15,6 +15,8 @@ */ #include +#include +#include #include #include #include @@ -157,10 +159,10 @@ void mtm_enable_core(unsigned int cpu) /* Verify and set the value of the mtm hs counter */ static int __init set_mtm_hs_ctr(char *ctr_str) { - long hs_ctr; + int hs_ctr; int ret; - ret = kstrtol(ctr_str, 0, &hs_ctr); + ret = kstrtoint(ctr_str, 0, &hs_ctr); if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) { pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", -- cgit From 2423665ec53f2a29191b35382075e9834288a975 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: arc: fix build errors in arc/include/asm/delay.h Fix build errors in arch/arc/'s delay.h: - add "extern unsigned long loops_per_jiffy;" - add for "u64" In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:61:12: error: 'u64' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~ In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:63:37: error: 'loops_per_jiffy' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~~~~~~~~~~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta --- arch/arc/include/asm/delay.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index d5da2115d78a..03d6bb0f4e13 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -17,8 +17,11 @@ #ifndef __ASM_ARC_UDELAY_H #define __ASM_ARC_UDELAY_H +#include #include /* HZ */ +extern unsigned long loops_per_jiffy; + static inline void __delay(unsigned long loops) { __asm__ __volatile__( -- cgit From ec837d620c750c0d4996a907c8c4f7febe1bbeee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: arc: fix type warnings in arc/mm/cache.c Fix type warnings in arch/arc/mm/cache.c. ../arch/arc/mm/cache.c: In function 'flush_anon_page': ../arch/arc/mm/cache.c:1062:55: warning: passing argument 2 of '__flush_dcache_page' makes integer from pointer without a cast [-Wint-conversion] __flush_dcache_page((phys_addr_t)page_address(page), page_address(page)); ^~~~~~~~~~~~~~~~~~ ../arch/arc/mm/cache.c:1013:59: note: expected 'long unsigned int' but argument is of type 'void *' void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) ~~~~~~~~~~~~~~^~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index b95365e1253a..25c631942500 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1038,7 +1038,7 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, unsigned long pfn) { - unsigned int paddr = pfn << PAGE_SHIFT; + phys_addr_t paddr = pfn << PAGE_SHIFT; u_vaddr &= PAGE_MASK; @@ -1058,8 +1058,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long u_vaddr) { /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_page(page_address(page), u_vaddr); - __flush_dcache_page(page_address(page), page_address(page)); + __flush_dcache_page((phys_addr_t)page_address(page), u_vaddr); + __flush_dcache_page((phys_addr_t)page_address(page), + (phys_addr_t)page_address(page)); } -- cgit From 573b3aa6940661dc50c383213d428c27df78be7c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 30 Jul 2018 08:49:03 -0700 Subject: tools/bpftool: fix a percpu_array map dump problem I hit the following problem when I tried to use bpftool to dump a percpu array. $ sudo ./bpftool map show 61: percpu_array name stub flags 0x0 key 4B value 4B max_entries 1 memlock 4096B ... $ sudo ./bpftool map dump id 61 bpftool: malloc.c:2406: sysmalloc: Assertion `(old_top == initial_top (av) && old_size == 0) || \ ((unsigned long) (old_size) >= MINSIZE && \ prev_inuse (old_top) && \ ((unsigned long) old_end & (pagesize - 1)) == 0)' failed. Aborted Further debugging revealed that this is due to miscommunication between bpftool and kernel. For example, for the above percpu_array with value size of 4B. The map info returned to user space has value size of 4B. In bpftool, the values array for lookup is allocated like: info->value_size * get_possible_cpus() = 4 * get_possible_cpus() In kernel (kernel/bpf/syscall.c), the values array size is rounded up to multiple of 8. round_up(map->value_size, 8) * num_possible_cpus() = 8 * num_possible_cpus() So when kernel copies the values to user buffer, the kernel will overwrite beyond user buffer boundary. This patch fixed the issue by allocating and stepping through percpu map value array properly in bpftool. Fixes: 71bb428fe2c19 ("tools: bpf: add bpftool") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 097b1a5e046b..f74a8bcbda87 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -90,7 +91,8 @@ static bool map_is_map_of_progs(__u32 type) static void *alloc_value(struct bpf_map_info *info) { if (map_is_per_cpu(info->type)) - return malloc(info->value_size * get_possible_cpus()); + return malloc(round_up(info->value_size, 8) * + get_possible_cpus()); else return malloc(info->value_size); } @@ -161,9 +163,10 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, jsonw_name(json_wtr, "value"); print_hex_data_json(value, info->value_size); } else { - unsigned int i, n; + unsigned int i, n, step; n = get_possible_cpus(); + step = round_up(info->value_size, 8); jsonw_name(json_wtr, "key"); print_hex_data_json(key, info->key_size); @@ -176,7 +179,7 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, jsonw_int_field(json_wtr, "cpu", i); jsonw_name(json_wtr, "value"); - print_hex_data_json(value + i * info->value_size, + print_hex_data_json(value + i * step, info->value_size); jsonw_end_object(json_wtr); @@ -207,9 +210,10 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, printf("\n"); } else { - unsigned int i, n; + unsigned int i, n, step; n = get_possible_cpus(); + step = round_up(info->value_size, 8); printf("key:\n"); fprint_hex(stdout, key, info->key_size, " "); @@ -217,7 +221,7 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, for (i = 0; i < n; i++) { printf("value (CPU %02d):%c", i, info->value_size > 16 ? '\n' : ' '); - fprint_hex(stdout, value + i * info->value_size, + fprint_hex(stdout, value + i * step, info->value_size, " "); printf("\n"); } -- cgit From 2d55d614fcf58187e2937dba11643b9471cd64d7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 27 Jul 2018 20:20:08 -0700 Subject: net: xsk: don't return frames via the allocator on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xdp_return_buff() is used when frame has been successfully handled (transmitted) or if an error occurred during delayed processing and there is no way to report it back to xdp_do_redirect(). In case of __xsk_rcv_zc() error is propagated all the way back to the driver, so there is no need to call xdp_return_buff(). Driver will recycle the frame anyway after seeing that error happened. Fixes: 173d3adb6f43 ("xsk: add zero-copy support for Rx") Signed-off-by: Jakub Kicinski Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 72335c2e8108..4e937cd7c17d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -84,10 +84,8 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len); - if (err) { - xdp_return_buff(xdp); + if (err) xs->rx_dropped++; - } return err; } -- cgit From 156c8b58ef5cfd97245928c95669fd4cb0f9c388 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Jul 2018 08:28:08 -0400 Subject: perf/x86/intel/uncore: Fix hardcoded index of Broadwell extra PCI devices Masayoshi Mizuma reported that a warning message is shown while a CPU is hot-removed on Broadwell servers: WARNING: CPU: 126 PID: 6 at arch/x86/events/intel/uncore.c:988 uncore_pci_remove+0x10b/0x150 Call Trace: pci_device_remove+0x42/0xd0 device_release_driver_internal+0x148/0x220 pci_stop_bus_device+0x76/0xa0 pci_stop_root_bus+0x44/0x60 acpi_pci_root_remove+0x1f/0x80 acpi_bus_trim+0x57/0x90 acpi_bus_trim+0x2e/0x90 acpi_device_hotplug+0x2bc/0x4b0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x174/0x3a0 worker_thread+0x4c/0x3d0 kthread+0xf8/0x130 This bug was introduced by: commit 15a3e845b01c ("perf/x86/intel/uncore: Fix SBOX support for Broadwell CPUs") The index of "QPI Port 2 filter" was hardcode to 2, but this conflicts with the index of "PCU.3" which is "HSWEP_PCI_PCU_3", which equals to 2 as well. To fix the conflict, the hardcoded index needs to be cleaned up: - introduce a new enumerator "BDX_PCI_QPI_PORT2_FILTER" for "QPI Port 2 filter" on Broadwell, - increase UNCORE_EXTRA_PCI_DEV_MAX by one, - clean up the hardcoded index. Debugged-by: Masayoshi Mizuma Suggested-by: Ingo Molnar Reported-by: Masayoshi Mizuma Tested-by: Masayoshi Mizuma Signed-off-by: Kan Liang Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: msys.mizuma@gmail.com Cc: stable@vger.kernel.org Fixes: 15a3e845b01c ("perf/x86/intel/uncore: Fix SBOX support for Broadwell CPUs") Link: http://lkml.kernel.org/r/1532953688-15008-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.h | 2 +- arch/x86/events/intel/uncore_snbep.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index c9e1e0bef3c3..e17ab885b1e9 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -28,7 +28,7 @@ #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff -#define UNCORE_EXTRA_PCI_DEV_MAX 3 +#define UNCORE_EXTRA_PCI_DEV_MAX 4 #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 87dc0263a2e1..51d7c117e3c7 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1029,6 +1029,7 @@ void snbep_uncore_cpu_init(void) enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, + BDX_PCI_QPI_PORT2_FILTER, HSWEP_PCI_PCU_3, }; @@ -3286,15 +3287,18 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { }, { /* QPI Port 0 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), }, { /* QPI Port 1 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), }, { /* QPI Port 2 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + BDX_PCI_QPI_PORT2_FILTER), }, { /* PCU.3 (for Capability registers) */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), -- cgit From c7513c2a2714204d3588ecaa170ae628fd0d217e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 27 Jul 2018 14:59:15 +0200 Subject: crypto/arm64: aes-ce-gcm - add missing kernel_neon_begin/end pair Calling pmull_gcm_encrypt_block() requires kernel_neon_begin() and kernel_neon_end() to be used since the routine touches the NEON register file. Add the missing calls. Also, since NEON register contents are not preserved outside of a kernel mode NEON region, pass the key schedule array again. Fixes: 7c50136a8aba ("crypto: arm64/aes-ghash - yield NEON after every ...") Acked-by: Herbert Xu Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/crypto/ghash-ce-glue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 7cf0b1aa6ea8..8a10f1d7199a 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -488,9 +488,13 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - if (walk.nbytes) - pmull_gcm_encrypt_block(iv, iv, NULL, + if (walk.nbytes) { + kernel_neon_begin(); + pmull_gcm_encrypt_block(iv, iv, ctx->aes_key.key_enc, num_rounds(&ctx->aes_key)); + kernel_neon_end(); + } + } else { __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, num_rounds(&ctx->aes_key)); -- cgit From 44bda4b7d26e9fffed6d7152d98a2e9edaeb2a76 Mon Sep 17 00:00:00 2001 From: Hari Vyas Date: Tue, 3 Jul 2018 14:35:41 +0530 Subject: PCI: Fix is_added/is_busmaster race condition When a PCI device is detected, pdev->is_added is set to 1 and proc and sysfs entries are created. When the device is removed, pdev->is_added is checked for one and then device is detached with clearing of proc and sys entries and at end, pdev->is_added is set to 0. is_added and is_busmaster are bit fields in pci_dev structure sharing same memory location. A strange issue was observed with multiple removal and rescan of a PCIe NVMe device using sysfs commands where is_added flag was observed as zero instead of one while removing device and proc,sys entries are not cleared. This causes issue in later device addition with warning message "proc_dir_entry" already registered. Debugging revealed a race condition between the PCI core setting the is_added bit in pci_bus_add_device() and the NVMe driver reset work-queue setting the is_busmaster bit in pci_set_master(). As these fields are not handled atomically, that clears the is_added bit. Move the is_added bit to a separate private flag variable and use atomic functions to set and retrieve the device addition state. This avoids the race because is_added no longer shares a memory location with is_busmaster. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200283 Signed-off-by: Hari Vyas Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Acked-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 4 +++- arch/powerpc/platforms/powernv/pci-ioda.c | 3 ++- arch/powerpc/platforms/pseries/setup.c | 3 ++- drivers/pci/bus.c | 6 +++--- drivers/pci/hotplug/acpiphp_glue.c | 2 +- drivers/pci/pci.h | 11 +++++++++++ drivers/pci/probe.c | 4 ++-- drivers/pci/remove.c | 5 +++-- include/linux/pci.h | 1 - 9 files changed, 27 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index fe9733ffffaa..471aac313b89 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -42,6 +42,8 @@ #include #include +#include "../../../drivers/pci/pci.h" + /* hose_spinlock protects accesses to the the phb_bitmap. */ static DEFINE_SPINLOCK(hose_spinlock); LIST_HEAD(hose_list); @@ -1014,7 +1016,7 @@ void pcibios_setup_bus_devices(struct pci_bus *bus) /* Cardbus can call us to add new devices to a bus, so ignore * those who are already fully discovered */ - if (dev->is_added) + if (pci_dev_is_added(dev)) continue; pcibios_setup_device(dev); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5bd0eb6681bc..70b2e1e0f23c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -46,6 +46,7 @@ #include "powernv.h" #include "pci.h" +#include "../../../../drivers/pci/pci.h" #define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */ #define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ @@ -3138,7 +3139,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) struct pci_dn *pdn; int mul, total_vfs; - if (!pdev->is_physfn || pdev->is_added) + if (!pdev->is_physfn || pci_dev_is_added(pdev)) return; pdn = pci_get_pdn(pdev); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 139f0af6c3d9..8a4868a3964b 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -71,6 +71,7 @@ #include #include "pseries.h" +#include "../../../../drivers/pci/pci.h" int CMO_PrPSP = -1; int CMO_SecPSP = -1; @@ -664,7 +665,7 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) const int *indexes; struct device_node *dn = pci_device_to_OF_node(pdev); - if (!pdev->is_physfn || pdev->is_added) + if (!pdev->is_physfn || pci_dev_is_added(pdev)) return; /*Firmware must support open sriov otherwise dont configure*/ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 35b7fc87eac5..5cb40b2518f9 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -330,7 +330,7 @@ void pci_bus_add_device(struct pci_dev *dev) return; } - dev->is_added = 1; + pci_dev_assign_added(dev, true); } EXPORT_SYMBOL_GPL(pci_bus_add_device); @@ -347,14 +347,14 @@ void pci_bus_add_devices(const struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { /* Skip already-added devices */ - if (dev->is_added) + if (pci_dev_is_added(dev)) continue; pci_bus_add_device(dev); } list_for_each_entry(dev, &bus->devices, bus_list) { /* Skip if device attach failed */ - if (!dev->is_added) + if (!pci_dev_is_added(dev)) continue; child = dev->subordinate; if (child) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 3a17b290df5d..ef0b1b6ba86f 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -509,7 +509,7 @@ static void enable_slot(struct acpiphp_slot *slot) list_for_each_entry(dev, &bus->devices, bus_list) { /* Assume that newly added devices are powered on already. */ - if (!dev->is_added) + if (!pci_dev_is_added(dev)) dev->current_state = PCI_D0; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 882f1f9596df..08817253c8a2 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -288,6 +288,7 @@ struct pci_sriov { /* pci_dev priv_flags */ #define PCI_DEV_DISCONNECTED 0 +#define PCI_DEV_ADDED 1 static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) { @@ -300,6 +301,16 @@ static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) return test_bit(PCI_DEV_DISCONNECTED, &dev->priv_flags); } +static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) +{ + assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added); +} + +static inline bool pci_dev_is_added(const struct pci_dev *dev) +{ + return test_bit(PCI_DEV_ADDED, &dev->priv_flags); +} + #ifdef CONFIG_PCI_ATS void pci_restore_ats_state(struct pci_dev *dev); #else diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ac876e32de4b..611adcd9c169 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2433,13 +2433,13 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) dev = pci_scan_single_device(bus, devfn); if (!dev) return 0; - if (!dev->is_added) + if (!pci_dev_is_added(dev)) nr++; for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) { dev = pci_scan_single_device(bus, devfn + fn); if (dev) { - if (!dev->is_added) + if (!pci_dev_is_added(dev)) nr++; dev->multifunction = 1; } diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 6f072eae4f7a..5e3d0dced2b8 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -19,11 +19,12 @@ static void pci_stop_dev(struct pci_dev *dev) { pci_pme_active(dev, false); - if (dev->is_added) { + if (pci_dev_is_added(dev)) { device_release_driver(&dev->dev); pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); - dev->is_added = 0; + + pci_dev_assign_added(dev, false); } if (dev->bus->self) diff --git a/include/linux/pci.h b/include/linux/pci.h index abd5d5e17aee..c133ccfa002e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -368,7 +368,6 @@ struct pci_dev { unsigned int transparent:1; /* Subtractive decode bridge */ unsigned int multifunction:1; /* Multi-function device */ - unsigned int is_added:1; unsigned int is_busmaster:1; /* Is busmaster */ unsigned int no_msi:1; /* May not use MSI */ unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ -- cgit From 80d20d35af1edd632a5e7a3b9c0ab7ceff92769e Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 31 Jul 2018 18:13:58 +0200 Subject: nohz: Fix local_timer_softirq_pending() local_timer_softirq_pending() checks whether the timer softirq is pending with: local_softirq_pending() & TIMER_SOFTIRQ. This is wrong because TIMER_SOFTIRQ is the softirq number and not a bitmask. So the test checks for the wrong bit. Use BIT(TIMER_SOFTIRQ) instead. Fixes: 5d62c183f9e9 ("nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick()") Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Reviewed-by: Paul E. McKenney Reviewed-by: Daniel Bristot de Oliveira Acked-by: Frederic Weisbecker Cc: bigeasy@linutronix.de Cc: peterz@infradead.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180731161358.29472-1-anna-maria@linutronix.de --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index da9455a6b42b..5b33e2f5c0ed 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -642,7 +642,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static inline bool local_timer_softirq_pending(void) { - return local_softirq_pending() & TIMER_SOFTIRQ; + return local_softirq_pending() & BIT(TIMER_SOFTIRQ); } static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) -- cgit From cca19f0b684f4ed6aabf6ad07ae3e15e77bfd78a Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 31 Jul 2018 15:24:52 +0200 Subject: powerpc/64s/radix: Fix missing global invalidations when removing copro With the optimizations for TLB invalidation from commit 0cef77c7798a ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask"), the scope of a TLBI (global vs. local) can now be influenced by the value of the 'copros' counter of the memory context. When calling mm_context_remove_copro(), the 'copros' counter is decremented first before flushing. It may have the unintended side effect of sending local TLBIs when we explicitly need global invalidations in this case. Thus breaking any nMMU user in a bad and unpredictable way. Fix it by flushing first, before updating the 'copros' counter, so that invalidations will be global. Fixes: 0cef77c7798a ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask") Signed-off-by: Frederic Barrat Reviewed-by: Nicholas Piggin Tested-by: Vaibhav Jain Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 79d570cbf332..b2f89b621b15 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -143,24 +143,33 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) { int c; - c = atomic_dec_if_positive(&mm->context.copros); - - /* Detect imbalance between add and remove */ - WARN_ON(c < 0); - /* - * Need to broadcast a global flush of the full mm before - * decrementing active_cpus count, as the next TLBI may be - * local and the nMMU and/or PSL need to be cleaned up. - * Should be rare enough so that it's acceptable. + * When removing the last copro, we need to broadcast a global + * flush of the full mm, as the next TLBI may be local and the + * nMMU and/or PSL need to be cleaned up. + * + * Both the 'copros' and 'active_cpus' counts are looked at in + * flush_all_mm() to determine the scope (local/global) of the + * TLBIs, so we need to flush first before decrementing + * 'copros'. If this API is used by several callers for the + * same context, it can lead to over-flushing. It's hopefully + * not common enough to be a problem. * * Skip on hash, as we don't know how to do the proper flush * for the time being. Invalidations will remain global if - * used on hash. + * used on hash. Note that we can't drop 'copros' either, as + * it could make some invalidations local with no flush + * in-between. */ - if (c == 0 && radix_enabled()) { + if (radix_enabled()) { flush_all_mm(mm); - dec_mm_active_cpus(mm); + + c = atomic_dec_if_positive(&mm->context.copros); + /* Detect imbalance between add and remove */ + WARN_ON(c < 0); + + if (c == 0) + dec_mm_active_cpus(mm); } } #else -- cgit From 6ea76bf51339506e9fb00d6caebf5d6b42a571e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Jul 2018 22:21:22 -0400 Subject: NFSv4: Fix _nfs4_do_setlk() The patch to fix the case where a lock request was interrupted ended up changing default handling of errors such as NFS4ERR_DENIED and caused the client to immediately resend the lock request. Let's do a partial revert of that request so that the default is now to exit, but change the way we handle resends to take into account the fact that the user may have interrupted the request. Reported-by: Kenneth Johansson Fixes: a3cf9bca2ace ("NFSv4: Don't add a new lock on an interrupted wait..") Cc: Benjamin Coddington Cc: Jeff Layton Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton --- fs/nfs/nfs4proc.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6dd146885da9..f6c4ccd693f4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6466,34 +6466,34 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) - break; + goto out_restart; } - if (data->arg.new_lock_owner != 0) { nfs_confirm_seqid(&lsp->ls_seqid, 0); nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - goto out_done; - } else if (nfs4_update_lock_stateid(lsp, &data->res.stateid)) - goto out_done; - + } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) + goto out_restart; break; case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: if (data->arg.new_lock_owner != 0) { - if (nfs4_stateid_match(&data->arg.open_stateid, + if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) - goto out_done; - } else if (nfs4_stateid_match(&data->arg.lock_stateid, + goto out_restart; + } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) - goto out_done; + goto out_restart; } - if (!data->cancelled) - rpc_restart_call_prepare(task); out_done: dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); + return; +out_restart: + if (!data->cancelled) + rpc_restart_call_prepare(task); + goto out_done; } static void nfs4_lock_release(void *calldata) @@ -6502,7 +6502,7 @@ static void nfs4_lock_release(void *calldata) dprintk("%s: begin!\n", __func__); nfs_free_seqid(data->arg.open_seqid); - if (data->cancelled) { + if (data->cancelled && data->rpc_status == 0) { struct rpc_task *task; task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, data->arg.lock_seqid); -- cgit From 1b3a62643660020cdc68e6139a010c06e8fc96c7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 1 Aug 2018 16:32:25 +0300 Subject: x86/boot/compressed/64: Validate trampoline placement against E820 There were two report of boot failure cased by trampoline placed into a reserved memory region. It can happen on machines that don't report EBDA correctly. Fix the problem by re-validating the found address against the E820 table. If the address is in a reserved area, find the next usable region below the initial address. Fixes: 3548e131ec6a ("x86/boot/compressed/64: Find a place for 32-bit trampoline") Reported-by: Dmitry Malkin Reported-by: youling 257 Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180801133225.38121-1-kirill.shutemov@linux.intel.com --- arch/x86/boot/compressed/pgtable_64.c | 73 ++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 18 deletions(-) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 8c5107545251..9e2157371491 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,3 +1,4 @@ +#include #include #include "pgtable.h" #include "../string.h" @@ -34,10 +35,62 @@ unsigned long *trampoline_32bit __section(.data); extern struct boot_params *boot_params; int cmdline_find_option_bool(const char *option); +static unsigned long find_trampoline_placement(void) +{ + unsigned long bios_start, ebda_start; + unsigned long trampoline_start; + struct boot_e820_entry *entry; + int i; + + /* + * Find a suitable spot for the trampoline. + * This code is based on reserve_bios_regions(). + */ + + ebda_start = *(unsigned short *)0x40e << 4; + bios_start = *(unsigned short *)0x413 << 10; + + if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) + bios_start = BIOS_START_MAX; + + if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) + bios_start = ebda_start; + + bios_start = round_down(bios_start, PAGE_SIZE); + + /* Find the first usable memory region under bios_start. */ + for (i = boot_params->e820_entries - 1; i >= 0; i--) { + entry = &boot_params->e820_table[i]; + + /* Skip all entries above bios_start. */ + if (bios_start <= entry->addr) + continue; + + /* Skip non-RAM entries. */ + if (entry->type != E820_TYPE_RAM) + continue; + + /* Adjust bios_start to the end of the entry if needed. */ + if (bios_start > entry->addr + entry->size) + bios_start = entry->addr + entry->size; + + /* Keep bios_start page-aligned. */ + bios_start = round_down(bios_start, PAGE_SIZE); + + /* Skip the entry if it's too small. */ + if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr) + continue; + + break; + } + + /* Place the trampoline just below the end of low memory */ + return bios_start - TRAMPOLINE_32BIT_SIZE; +} + struct paging_config paging_prepare(void *rmode) { struct paging_config paging_config = {}; - unsigned long bios_start, ebda_start; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ boot_params = rmode; @@ -61,23 +114,7 @@ struct paging_config paging_prepare(void *rmode) paging_config.l5_required = 1; } - /* - * Find a suitable spot for the trampoline. - * This code is based on reserve_bios_regions(). - */ - - ebda_start = *(unsigned short *)0x40e << 4; - bios_start = *(unsigned short *)0x413 << 10; - - if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) - bios_start = BIOS_START_MAX; - - if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) - bios_start = ebda_start; - - /* Place the trampoline just below the end of low memory, aligned to 4k */ - paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE; - paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE); + paging_config.trampoline_start = find_trampoline_placement(); trampoline_32bit = (unsigned long *)paging_config.trampoline_start; -- cgit From 258fe208f2829d75ac837c17dbdc697ef653a395 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Thu, 2 Aug 2018 15:27:27 +0530 Subject: selftest/net: fix protocol family to work for IPv4. use actual protocol family passed by user rather than hardcoded AF_INTE6 to cerate sockets. current code is not working for IPv4. Signed-off-by: Maninder Singh Signed-off-by: Vaneet Narang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 77f762780199..e8c5dff448eb 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -402,7 +402,7 @@ int main(int argc, char *argv[]) exit(1); } - fd = socket(AF_INET6, SOCK_STREAM, 0); + fd = socket(cfg_family, SOCK_STREAM, 0); if (fd == -1) { perror("socket"); exit(1); -- cgit From 79b3dbe4adb3420e74cf755b4beb5d2b43d5928d Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 2 Aug 2018 13:09:27 -0700 Subject: fs: fix iomap_bmap position calculation The position calculation in iomap_bmap() shifts bno the wrong way, so we don't progress properly and end up re-mapping block zero over and over, yielding an unchanging physical block range as the logical block advances: # filefrag -Be file ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 0: 21.. 21: 1: merged 1: 1.. 1: 21.. 21: 1: 22: merged Discontinuity: Block 1 is at 21 (was 22) 2: 2.. 2: 21.. 21: 1: 22: merged Discontinuity: Block 2 is at 21 (was 22) 3: 3.. 3: 21.. 21: 1: 22: merged This breaks the FIBMAP interface for anyone using it (XFS), which in turn breaks LILO, zipl, etc. Bug-actually-spotted-by: Darrick J. Wong Fixes: 89eb1906a953 ("iomap: add an iomap-based bmap implementation") Cc: stable@vger.kernel.org Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/iomap.c b/fs/iomap.c index 77397b5a96ef..0d0bd8845586 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1443,7 +1443,7 @@ iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops) { struct inode *inode = mapping->host; - loff_t pos = bno >> inode->i_blkbits; + loff_t pos = bno << inode->i_blkbits; unsigned blocksize = i_blocksize(inode); if (filemap_write_and_wait(mapping)) -- cgit From 2d5ba0e2de24ec87636244a01d4e78d095cc1b20 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 3 Aug 2018 01:49:37 +0800 Subject: blk-mq: fix blk_mq_tagset_busy_iter Commit d250bf4e776ff09d5("blk-mq: only iterate over inflight requests in blk_mq_tagset_busy_iter") uses 'blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT' to replace 'blk_mq_request_started(req)', this way is wrong, and causes lots of test system hang during booting. Fix the issue by using blk_mq_request_started(req) inside bt_tags_iter(). Fixes: d250bf4e776ff09d5 ("blk-mq: only iterate over inflight requests in blk_mq_tagset_busy_iter") Cc: Josef Bacik Cc: Christoph Hellwig Cc: Guenter Roeck Cc: Mark Brown Cc: Matt Hart Cc: Johannes Thumshirn Cc: John Garry Cc: Hannes Reinecke , Cc: "Martin K. Petersen" , Cc: James Bottomley Cc: linux-scsi@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Bart Van Assche Tested-by: Guenter Roeck Reported-by: Mark Brown Reported-by: Guenter Roeck Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 09b2ee6694fb..3de0836163c2 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -271,7 +271,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * test and set the bit before assining ->rqs[]. */ rq = tags->rqs[bitnr]; - if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) + if (rq && blk_mq_request_started(rq)) iter_data->fn(rq, iter_data->data, reserved); return true; -- cgit From afb41bb039656f0cecb54eeb8b2e2088201295f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Aug 2018 18:22:41 +0100 Subject: drivers: net: lmc: fix case value for target abort error Current value for a target abort error is 0x010, however, this value should in fact be 0x002. As it stands, the range of error is 0..7 so it is currently never being detected. This bug has been in the driver since the early 2.6.12 days (or before). Detected by CoverityScan, CID#744290 ("Logically dead code") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/wan/lmc/lmc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 90a4ad9a2d08..b3a1b6f5c406 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -1362,7 +1362,7 @@ static irqreturn_t lmc_interrupt (int irq, void *dev_instance) /*fold00*/ case 0x001: printk(KERN_WARNING "%s: Master Abort (naughty)\n", dev->name); break; - case 0x010: + case 0x002: printk(KERN_WARNING "%s: Target Abort (not so naughty)\n", dev->name); break; default: -- cgit From 7e97de0b033bcac4fa9a35cef72e0c06e6a22c67 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 2 Aug 2018 15:36:01 -0700 Subject: memcg: remove memcg_cgroup::id from IDR on mem_cgroup_css_alloc() failure In case of memcg_online_kmem() failure, memcg_cgroup::id remains hashed in mem_cgroup_idr even after memcg memory is freed. This leads to leak of ID in mem_cgroup_idr. This patch adds removal into mem_cgroup_css_alloc(), which fixes the problem. For better readability, it adds a generic helper which is used in mem_cgroup_alloc() and mem_cgroup_id_put_many() as well. Link: http://lkml.kernel.org/r/152354470916.22460.14397070748001974638.stgit@localhost.localdomain Fixes 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") Signed-off-by: Kirill Tkhai Acked-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8c0280b3143e..b2173f7e5164 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4037,6 +4037,14 @@ static struct cftype mem_cgroup_legacy_files[] = { static DEFINE_IDR(mem_cgroup_idr); +static void mem_cgroup_id_remove(struct mem_cgroup *memcg) +{ + if (memcg->id.id > 0) { + idr_remove(&mem_cgroup_idr, memcg->id.id); + memcg->id.id = 0; + } +} + static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0); @@ -4047,8 +4055,7 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { VM_BUG_ON(atomic_read(&memcg->id.ref) < n); if (atomic_sub_and_test(n, &memcg->id.ref)) { - idr_remove(&mem_cgroup_idr, memcg->id.id); - memcg->id.id = 0; + mem_cgroup_id_remove(memcg); /* Memcg ID pins CSS */ css_put(&memcg->css); @@ -4185,8 +4192,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); return memcg; fail: - if (memcg->id.id > 0) - idr_remove(&mem_cgroup_idr, memcg->id.id); + mem_cgroup_id_remove(memcg); __mem_cgroup_free(memcg); return NULL; } @@ -4245,6 +4251,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &memcg->css; fail: + mem_cgroup_id_remove(memcg); mem_cgroup_free(memcg); return ERR_PTR(-ENOMEM); } -- cgit From eec3636ad198d4ac61e574cb122cb67e9bef5492 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Thu, 2 Aug 2018 15:36:05 -0700 Subject: ipc/shm.c add ->pagesize function to shm_vm_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 05ea88608d4e ("mm, hugetlbfs: introduce ->pagesize() to vm_operations_struct") adds a new ->pagesize() function to hugetlb_vm_ops, intended to cover all hugetlbfs backed files. With System V shared memory model, if "huge page" is specified, the "shared memory" is backed by hugetlbfs files, but the mappings initiated via shmget/shmat have their original vm_ops overwritten with shm_vm_ops, so we need to add a ->pagesize function to shm_vm_ops. Otherwise, vma_kernel_pagesize() returns PAGE_SIZE given a hugetlbfs backed vma, result in below BUG: fs/hugetlbfs/inode.c 443 if (unlikely(page_mapped(page))) { 444 BUG_ON(truncate_op); resulting in hugetlbfs: oracle (4592): Using mlock ulimits for SHM_HUGETLB is deprecated ------------[ cut here ]------------ kernel BUG at fs/hugetlbfs/inode.c:444! Modules linked in: nfsv3 rpcsec_gss_krb5 nfsv4 ... CPU: 35 PID: 5583 Comm: oracle_5583_sbt Not tainted 4.14.35-1829.el7uek.x86_64 #2 RIP: 0010:remove_inode_hugepages+0x3db/0x3e2 .... Call Trace: hugetlbfs_evict_inode+0x1e/0x3e evict+0xdb/0x1af iput+0x1a2/0x1f7 dentry_unlink_inode+0xc6/0xf0 __dentry_kill+0xd8/0x18d dput+0x1b5/0x1ed __fput+0x18b/0x216 ____fput+0xe/0x10 task_work_run+0x90/0xa7 exit_to_usermode_loop+0xdd/0x116 do_syscall_64+0x187/0x1ae entry_SYSCALL_64_after_hwframe+0x150/0x0 [jane.chu@oracle.com: relocate comment] Link: http://lkml.kernel.org/r/20180731044831.26036-1-jane.chu@oracle.com Link: http://lkml.kernel.org/r/20180727211727.5020-1-jane.chu@oracle.com Fixes: 05ea88608d4e13 ("mm, hugetlbfs: introduce ->pagesize() to vm_operations_struct") Signed-off-by: Jane Chu Suggested-by: Mike Kravetz Reviewed-by: Mike Kravetz Acked-by: Davidlohr Bueso Acked-by: Michal Hocko Cc: Dan Williams Cc: Jan Kara Cc: Jérôme Glisse Cc: Manfred Spraul Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/shm.c | 12 ++++++++++++ mm/hugetlb.c | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/ipc/shm.c b/ipc/shm.c index 051a3e1fb8df..fefa00d310fb 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -427,6 +427,17 @@ static int shm_split(struct vm_area_struct *vma, unsigned long addr) return 0; } +static unsigned long shm_pagesize(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + if (sfd->vm_ops->pagesize) + return sfd->vm_ops->pagesize(vma); + + return PAGE_SIZE; +} + #ifdef CONFIG_NUMA static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) { @@ -554,6 +565,7 @@ static const struct vm_operations_struct shm_vm_ops = { .close = shm_close, /* callback for when the vm-area is released */ .fault = shm_fault, .split = shm_split, + .pagesize = shm_pagesize, #if defined(CONFIG_NUMA) .set_policy = shm_set_policy, .get_policy = shm_get_policy, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 039ddbc574e9..3103099f64fd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3167,6 +3167,13 @@ static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf) return 0; } +/* + * When a new function is introduced to vm_operations_struct and added + * to hugetlb_vm_ops, please consider adding the function to shm_vm_ops. + * This is because under System V memory model, mappings created via + * shmget/shmat with "huge page" specified are backed by hugetlbfs files, + * their original vm_ops are overwritten with shm_vm_ops. + */ const struct vm_operations_struct hugetlb_vm_ops = { .fault = hugetlb_vm_op_fault, .open = hugetlb_vm_op_open, -- cgit From 31e810aa1033a7db50a2746cd34a2432237f6420 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 2 Aug 2018 15:36:09 -0700 Subject: userfaultfd: remove uffd flags from vma->vm_flags if UFFD_EVENT_FORK fails The fix in commit 0cbb4b4f4c44 ("userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails") cleared the vma->vm_userfaultfd_ctx but kept userfaultfd flags in vma->vm_flags that were copied from the parent process VMA. As the result, there is an inconsistency between the values of vma->vm_userfaultfd_ctx.ctx and vma->vm_flags which triggers BUG_ON in userfaultfd_release(). Clearing the uffd flags from vma->vm_flags in case of UFFD_EVENT_FORK failure resolves the issue. Link: http://lkml.kernel.org/r/1532931975-25473-1-git-send-email-rppt@linux.vnet.ibm.com Fixes: 0cbb4b4f4c44 ("userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails") Signed-off-by: Mike Rapoport Reported-by: syzbot+121be635a7a35ddb7dcb@syzkaller.appspotmail.com Cc: Andrea Arcangeli Cc: Eric Biggers Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 594d192b2331..bad9cea37f12 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -633,8 +633,10 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, /* the various vma->vm_userfaultfd_ctx still points to it */ down_write(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING); + } up_write(&mm->mmap_sem); userfaultfd_ctx_put(release_new_ctx); -- cgit From 8c85cbdf371f9ddf256ecc5d9548b26ee8fcfe2f Mon Sep 17 00:00:00 2001 From: Mathieu Xhonneux Date: Wed, 1 Aug 2018 15:34:54 +0000 Subject: selftests/bpf: update test_lwt_seg6local.sh according to iproute2 The shell file for test_lwt_seg6local contains an early iproute2 syntax for installing a seg6local End.BPF route. iproute2 support for this feature has recently been upstreamed, but with an additional keyword required. This patch updates test_lwt_seg6local.sh to the definitive iproute2 syntax Signed-off-by: Mathieu Xhonneux Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_lwt_seg6local.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh index 270fa8f49573..785eabf2a593 100755 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -115,14 +115,14 @@ ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65 -ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4 +ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4 -ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6 +ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6 ip netns exec ns4 ip -6 addr add fc42::1 dev lo ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87 ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 -ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8 +ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8 ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo -- cgit From d1f0301b3333eef5efbfa1fe0f0edbea01863d5d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Aug 2018 14:44:59 +0200 Subject: genirq: Make force irq threading setup more robust The support of force threading interrupts which are set up with both a primary and a threaded handler wreckaged the setup of regular requested threaded interrupts (primary handler == NULL). The reason is that it does not check whether the primary handler is set to the default handler which wakes the handler thread. Instead it replaces the thread handler with the primary handler as it would do with force threaded interrupts which have been requested via request_irq(). So both the primary and the thread handler become the same which then triggers the warnon that the thread handler tries to wakeup a not configured secondary thread. Fortunately this only happens when the driver omits the IRQF_ONESHOT flag when requesting the threaded interrupt, which is normaly caught by the sanity checks when force irq threading is disabled. Fix it by skipping the force threading setup when a regular threaded interrupt is requested. As a consequence the interrupt request which lacks the IRQ_ONESHOT flag is rejected correctly instead of silently wreckaging it. Fixes: 2a1d3ab8986d ("genirq: Handle force threading of irqs with primary and thread handler") Reported-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Tested-by: Kurt Kanzenbach Cc: stable@vger.kernel.org --- kernel/irq/manage.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index daeabd791d58..9a8b7ba9aa88 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1068,6 +1068,13 @@ static int irq_setup_forced_threading(struct irqaction *new) if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) return 0; + /* + * No further action required for interrupts which are requested as + * threaded interrupts already + */ + if (new->handler == irq_default_primary_handler) + return 0; + new->flags |= IRQF_ONESHOT; /* @@ -1075,7 +1082,7 @@ static int irq_setup_forced_threading(struct irqaction *new) * thread handler. We force thread them as well by creating a * secondary action. */ - if (new->handler != irq_default_primary_handler && new->thread_fn) { + if (new->handler && new->thread_fn) { /* Allocate the secondary action */ new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL); if (!new->secondary) -- cgit From 0a0e0829f990120cef165bbb804237f400953ec2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 3 Aug 2018 15:31:34 +0200 Subject: nohz: Fix missing tick reprogram when interrupting an inline softirq The full nohz tick is reprogrammed in irq_exit() only if the exit is not in a nesting interrupt. This stands as an optimization: whether a hardirq or a softirq is interrupted, the tick is going to be reprogrammed when necessary at the end of the inner interrupt, with even potential new updates on the timer queue. When soft interrupts are interrupted, it's assumed that they are executing on the tail of an interrupt return. In that case tick_nohz_irq_exit() is called after softirq processing to take care of the tick reprogramming. But the assumption is wrong: softirqs can be processed inline as well, ie: outside of an interrupt, like in a call to local_bh_enable() or from ksoftirqd. Inline softirqs don't reprogram the tick once they are done, as opposed to interrupt tail softirq processing. So if a tick interrupts an inline softirq processing, the next timer will neither be reprogrammed from the interrupting tick's irq_exit() nor after the interrupted softirq processing. This situation may leave the tick unprogrammed while timers are armed. To fix this, simply keep reprogramming the tick even if a softirq has been interrupted. That can be optimized further, but for now correctness is more important. Note that new timers enqueued in nohz_full mode after a softirq gets interrupted will still be handled just fine through self-IPIs triggered by the timer code. Reported-by: Anna-Maria Gleixner Signed-off-by: Frederic Weisbecker Signed-off-by: Thomas Gleixner Tested-by: Anna-Maria Gleixner Cc: stable@vger.kernel.org # 4.14+ Link: https://lkml.kernel.org/r/1533303094-15855-1-git-send-email-frederic@kernel.org --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 75ffc1d1a2e0..6f584861d329 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -390,7 +390,7 @@ static inline void tick_irq_exit(void) /* Make sure that timer wheel updates are propagated */ if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { - if (!in_interrupt()) + if (!in_irq()) tick_nohz_irq_exit(); } #endif -- cgit From 3757b255bf20ae3c941abae7624ff215bfd9ef05 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 3 Aug 2018 15:57:41 +0300 Subject: mlxsw: core_acl_flex_actions: Return error for conflicting actions Spectrum switch ACL action set is built in groups of three actions which may point to additional actions. A group holds a single record which can be set as goto record for pointing at a following group or can be set to mark the termination of the lookup. This is perfectly adequate for handling a series of actions to be executed on a packet. While the SW model allows configuration of conflicting actions where it is clear that some actions will never execute, the mlxsw driver must block such configurations as it creates a conflict over the single terminate/goto record value. For a conflicting actions configuration such as: # tc filter add dev swp49 parent ffff: \ protocol ip pref 10 \ flower skip_sw dst_ip 192.168.101.1 \ action goto chain 100 \ action mirred egress mirror dev swp4 Where it is clear that the last action will never execute, the mlxsw driver was issuing a warning instead of returning an error. Therefore replace that warning with an error for this specific case. Fixes: 4cda7d8d7098 ("mlxsw: core: Introduce flexible actions support") Signed-off-by: Nir Dotan Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/core_acl_flex_actions.c | 42 +++++++++++----------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 3c0d882ba183..ce280680258e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -626,8 +626,8 @@ static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block, char *oneact; char *actions; - if (WARN_ON(block->finished)) - return NULL; + if (block->finished) + return ERR_PTR(-EINVAL); if (block->cur_act_index + action_size > block->afa->max_acts_per_set) { struct mlxsw_afa_set *set; @@ -637,7 +637,7 @@ static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block, */ set = mlxsw_afa_set_create(false); if (!set) - return NULL; + return ERR_PTR(-ENOBUFS); set->prev = block->cur_set; block->cur_act_index = 0; block->cur_set->next = set; @@ -724,8 +724,8 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, MLXSW_AFA_VLAN_CODE, MLXSW_AFA_VLAN_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, MLXSW_AFA_VLAN_CMD_SET_OUTER, vid, MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp, @@ -806,8 +806,8 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) MLXSW_AFA_TRAPDISC_CODE, MLXSW_AFA_TRAPDISC_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, 0); return 0; @@ -820,8 +820,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) MLXSW_AFA_TRAPDISC_CODE, MLXSW_AFA_TRAPDISC_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, trap_id); @@ -836,8 +836,8 @@ int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, MLXSW_AFA_TRAPDISC_CODE, MLXSW_AFA_TRAPDISC_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, trap_id); @@ -908,8 +908,8 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block, char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAPDISC_CODE, MLXSW_AFA_TRAPDISC_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_NOP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, 0); mlxsw_afa_trapdisc_mirror_pack(act, true, mirror_agent); @@ -996,8 +996,8 @@ int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE, MLXSW_AFA_FORWARD_SIZE); - if (!act) { - err = -ENOBUFS; + if (IS_ERR(act)) { + err = PTR_ERR(act); goto err_append_action; } mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS, @@ -1052,8 +1052,8 @@ int mlxsw_afa_block_append_allocated_counter(struct mlxsw_afa_block *block, { char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_POLCNT_CODE, MLXSW_AFA_POLCNT_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES, counter_index); return 0; @@ -1123,8 +1123,8 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid) char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_VIRFWD_CODE, MLXSW_AFA_VIRFWD_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid); return 0; } @@ -1193,8 +1193,8 @@ int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_MCROUTER_CODE, MLXSW_AFA_MCROUTER_SIZE); - if (!act) - return -ENOBUFS; + if (IS_ERR(act)) + return PTR_ERR(act); mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, expected_irif, min_mtu, rmid_valid, kvdl_index); return 0; -- cgit From dda0a3a3fb92451d4a922e56365ee1f73c8a9586 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 3 Aug 2018 15:57:42 +0300 Subject: mlxsw: core_acl_flex_actions: Remove redundant resource destruction Some ACL actions require the allocation of a separate resource prior to applying the action itself. When facing an error condition during the setup phase of the action, resource should be destroyed. For such actions the destruction was done twice which is dangerous and lead to a potential crash. The destruction took place first upon error on action setup phase and then as the rule was destroyed. The following sequence generated a crash: # tc qdisc add dev swp49 ingress # tc filter add dev swp49 parent ffff: \ protocol ip chain 100 pref 10 \ flower skip_sw dst_ip 192.168.101.1 action drop # tc filter add dev swp49 parent ffff: \ protocol ip pref 10 \ flower skip_sw dst_ip 192.168.101.1 action goto chain 100 \ action mirred egress mirror dev swp4 Therefore add mlxsw_afa_resource_del() as a complement of mlxsw_afa_resource_add() to add symmetry to resource_list membership handling. Call this from mlxsw_afa_fwd_entry_ref_destroy() to make the _fwd_entry_ref_create() and _fwd_entry_ref_destroy() pair of calls a NOP. Fixes: 140ce421217e ("mlxsw: core: Convert fwd_entry_ref list to be generic per-block resource list") Signed-off-by: Nir Dotan Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index ce280680258e..d664cc0289c2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -327,12 +327,16 @@ static void mlxsw_afa_resource_add(struct mlxsw_afa_block *block, list_add(&resource->list, &block->resource_list); } +static void mlxsw_afa_resource_del(struct mlxsw_afa_resource *resource) +{ + list_del(&resource->list); +} + static void mlxsw_afa_resources_destroy(struct mlxsw_afa_block *block) { struct mlxsw_afa_resource *resource, *tmp; list_for_each_entry_safe(resource, tmp, &block->resource_list, list) { - list_del(&resource->list); resource->destructor(block, resource); } } @@ -530,6 +534,7 @@ static void mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block, struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref) { + mlxsw_afa_resource_del(&fwd_entry_ref->resource); mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry); kfree(fwd_entry_ref); } -- cgit From 7cc6169493990dec488eda0a3f6612729ca25e81 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 3 Aug 2018 15:57:43 +0300 Subject: mlxsw: core_acl_flex_actions: Remove redundant counter destruction Each tc flower rule uses a hidden count action. As counter resource may not be available due to limited HW resources, update _counter_create() and _counter_destroy() pair to follow previously introduced symmetric error condition handling, add a call to mlxsw_afa_resource_del() as part of the counter resource destruction. Fixes: c18c1e186ba8 ("mlxsw: core: Make counter index allocated inside the action append") Signed-off-by: Nir Dotan Reviewed-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index d664cc0289c2..a54f23f00a5f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -584,6 +584,7 @@ static void mlxsw_afa_counter_destroy(struct mlxsw_afa_block *block, struct mlxsw_afa_counter *counter) { + mlxsw_afa_resource_del(&counter->resource); block->afa->ops->counter_index_put(block->afa->ops_priv, counter->counter_index); kfree(counter); -- cgit From caebd1b389708bf3d0465be829480fc706a68720 Mon Sep 17 00:00:00 2001 From: Nir Dotan Date: Fri, 3 Aug 2018 15:57:44 +0300 Subject: mlxsw: core_acl_flex_actions: Remove redundant mirror resource destruction In previous patch mlxsw_afa_resource_del() was added to avoid a duplicate resource detruction scenario. For mirror actions, such duplicate destruction leads to a crash as in: # tc qdisc add dev swp49 ingress # tc filter add dev swp49 parent ffff: \ protocol ip chain 100 pref 10 \ flower skip_sw dst_ip 192.168.101.1 action drop # tc filter add dev swp49 parent ffff: \ protocol ip pref 10 \ flower skip_sw dst_ip 192.168.101.1 action goto chain 100 \ action mirred egress mirror dev swp4 Therefore add a call to mlxsw_afa_resource_del() in mlxsw_afa_mirror_destroy() in order to clear that resource from rule's resources. Fixes: d0d13c1858a1 ("mlxsw: spectrum_acl: Add support for mirror action") Signed-off-by: Nir Dotan Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index a54f23f00a5f..f6f6a568d66a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -862,6 +862,7 @@ static void mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block, struct mlxsw_afa_mirror *mirror) { + mlxsw_afa_resource_del(&mirror->resource); block->afa->ops->mirror_del(block->afa->ops_priv, mirror->local_in_port, mirror->span_id, -- cgit From f664e37dcc525768280cb94321424a09beb1c992 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 3 Aug 2018 17:00:11 +0200 Subject: l2tp: fix missing refcount drop in pppol2tp_tunnel_ioctl() If 'session' is not NULL and is not a PPP pseudo-wire, then we fail to drop the reference taken by l2tp_session_get(). Fixes: ecd012e45ab5 ("l2tp: filter out non-PPP sessions in pppol2tp_tunnel_ioctl()") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index e398797878a9..cf6cca260e7b 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1201,13 +1201,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, l2tp_session_get(sock_net(sk), tunnel, stats.session_id); - if (session && session->pwtype == L2TP_PWTYPE_PPP) { - err = pppol2tp_session_ioctl(session, cmd, - arg); + if (!session) { + err = -EBADR; + break; + } + if (session->pwtype != L2TP_PWTYPE_PPP) { l2tp_session_dec_refcount(session); - } else { err = -EBADR; + break; } + + err = pppol2tp_session_ioctl(session, cmd, arg); + l2tp_session_dec_refcount(session); break; } #ifdef CONFIG_XFRM -- cgit From 961b33c244e5ba1543ae26270a1ba29f29c2db83 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 3 Aug 2018 12:52:58 -0700 Subject: jfs: Fix usercopy whitelist for inline inode data Bart Massey reported what turned out to be a usercopy whitelist false positive in JFS when symlink contents exceeded 128 bytes. The inline inode data (i_inline) is actually designed to overflow into the "extended area" following it (i_inline_ea) when needed. So the whitelist needed to be expanded to include both i_inline and i_inline_ea (the whole size of which is calculated internally using IDATASIZE, 256, instead of sizeof(i_inline), 128). $ cd /mnt/jfs $ touch $(perl -e 'print "B" x 250') $ ln -s B* b $ ls -l >/dev/null [ 249.436410] Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLUB object 'jfs_ip' (offset 616, size 250)! Reported-by: Bart Massey Fixes: 8d2704d382a9 ("jfs: Define usercopy region in jfs_ip slab cache") Cc: Dave Kleikamp Cc: jfs-discussion@lists.sourceforge.net Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- fs/jfs/jfs_dinode.h | 7 +++++++ fs/jfs/jfs_incore.h | 1 + fs/jfs/super.c | 3 +-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 395c4c0d0f06..1682a87c00b2 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -115,6 +115,13 @@ struct dinode { dxd_t _dxd; /* 16: */ union { __le32 _rdev; /* 4: */ + /* + * The fast symlink area + * is expected to overflow + * into _inlineea when + * needed (which will clear + * INLINEEA). + */ u8 _fastsymlink[128]; } _u; u8 _inlineea[128]; diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 1f26d1910409..9940a1e04cbf 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -87,6 +87,7 @@ struct jfs_inode_info { struct { unchar _unused[16]; /* 16: */ dxd_t _dxd; /* 16: */ + /* _inline may overflow into _inline_ea when needed */ unchar _inline[128]; /* 128: inline symlink */ /* _inline_ea may overlay the last part of * file._xtroot if maxentry = XTROOTINITSLOT diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 1b9264fd54b6..f08571433aba 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -967,8 +967,7 @@ static int __init init_jfs_fs(void) jfs_inode_cachep = kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, - offsetof(struct jfs_inode_info, i_inline), - sizeof_field(struct jfs_inode_info, i_inline), + offsetof(struct jfs_inode_info, i_inline), IDATASIZE, init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; -- cgit From 5607016cd1bbec538050b495669c3c8c5a2cee80 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Fri, 3 Aug 2018 10:38:33 +0200 Subject: net/smc: no cursor update send in state SMC_INIT If a writer blocked condition is received without data, the current consumer cursor is immediately sent. Servers could already receive this condition in state SMC_INIT without finished tx-setup. This patch avoids sending a consumer cursor update in this case. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a7e8d63fc8ae..9bde1e4ca288 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -233,7 +233,8 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, /* force immediate tx of current consumer cursor, but * under send_lock to guarantee arrival in seqno-order */ - smc_tx_sndbuf_nonempty(conn); + if (smc->sk.sk_state != SMC_INIT) + smc_tx_sndbuf_nonempty(conn); } } -- cgit From 91874ecf32e41b5d86a4cb9d60e0bee50d828058 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sun, 5 Aug 2018 01:35:53 +0100 Subject: netlink: Don't shift on 64 for ngroups It's legal to have 64 groups for netlink_sock. As user-supplied nladdr->nl_groups is __u32, it's possible to subscribe only to first 32 groups. The check for correctness of .bind() userspace supplied parameter is done by applying mask made from ngroups shift. Which broke Android as they have 64 groups and the shift for mask resulted in an overflow. Fixes: 61f4b23769f0 ("netlink: Don't shift with UB on nlk->ngroups") Cc: "David S. Miller" Cc: Herbert Xu Cc: Steffen Klassert Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Reported-and-Tested-by: Nathan Chancellor Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c09d16870f74..56704d95f82d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1013,8 +1013,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->ngroups == 0) groups = 0; - else - groups &= (1ULL << nlk->ngroups) - 1; + else if (nlk->ngroups < 8*sizeof(groups)) + groups &= (1UL << nlk->ngroups) - 1; bound = nlk->bound; if (bound) { -- cgit From a32e236eb93e62a0f692e79b7c3c9636689559b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 3 Aug 2018 12:22:09 -0700 Subject: Partially revert "block: fail op_is_write() requests to read-only partitions" It turns out that commit 721c7fc701c7 ("block: fail op_is_write() requests to read-only partitions"), while obviously correct, causes problems for some older lvm2 installations. The reason is that the lvm snapshotting will continue to write to the snapshow COW volume, even after the volume has been marked read-only. End result: snapshot failure. This has actually been fixed in newer version of the lvm2 tool, but the old tools still exist, and the breakage was reported both in the kernel bugzilla and in the Debian bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200439 https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=900442 The lvm2 fix is here https://sourceware.org/git/?p=lvm2.git;a=commit;h=a6fdb9d9d70f51c49ad11a87ab4243344e6701a3 but until everybody has updated to recent versions, we'll have to weaken the "never write to read-only partitions" check. It now allows the write to happen, but causes a warning, something like this: generic_make_request: Trying to write to read-only block-device dm-3 (partno X) Modules linked in: nf_tables xt_cgroup xt_owner kvm_intel iwlmvm kvm irqbypass iwlwifi CPU: 1 PID: 77 Comm: kworker/1:1 Not tainted 4.17.9-gentoo #3 Hardware name: LENOVO 20B6A019RT/20B6A019RT, BIOS GJET91WW (2.41 ) 09/21/2016 Workqueue: ksnaphd do_metadata RIP: 0010:generic_make_request_checks+0x4ac/0x600 ... Call Trace: generic_make_request+0x64/0x400 submit_bio+0x6c/0x140 dispatch_io+0x287/0x430 sync_io+0xc3/0x120 dm_io+0x1f8/0x220 do_metadata+0x1d/0x30 process_one_work+0x1b9/0x3e0 worker_thread+0x2b/0x3c0 kthread+0x113/0x130 ret_from_fork+0x35/0x40 Note that this is a "revert" in behavior only. I'm leaving alone the actual code cleanups in commit 721c7fc701c7, but letting the previously uncaught request go through with a warning instead of stopping it. Fixes: 721c7fc701c7 ("block: fail op_is_write() requests to read-only partitions") Reported-and-tested-by: WGH Acked-by: Mike Snitzer Cc: Sagi Grimberg Cc: Ilya Dryomov Cc: Jens Axboe Cc: Zdenek Kabelac Signed-off-by: Linus Torvalds --- block/blk-core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index f84a9b7b6f5a..ee33590f54eb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2155,11 +2155,12 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) if (part->policy && op_is_write(bio_op(bio))) { char b[BDEVNAME_SIZE]; - printk(KERN_ERR + WARN_ONCE(1, "generic_make_request: Trying to write " "to read-only block-device %s (partno %d)\n", bio_devname(bio, b), part->partno); - return true; + /* Older lvm-tools actually trigger this */ + return false; } return false; -- cgit From 1ffaddd029c867d134a1dde39f540dcc8c52e274 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Aug 2018 12:37:41 -0700 Subject: Linux 4.18-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 85f3481a56d6..7a3c4548162b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Merciless Moray # *DOCUMENTATION* -- cgit