diff options
-rw-r--r-- | Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml | 1 | ||||
-rw-r--r-- | arch/arm/boot/dts/socfpga.dtsi | 2 | ||||
-rw-r--r-- | arch/arm/boot/dts/socfpga_arria10.dtsi | 2 | ||||
-rw-r--r-- | arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 2 | ||||
-rw-r--r-- | arch/arm64/boot/dts/intel/socfpga_agilex.dtsi | 2 | ||||
-rw-r--r-- | drivers/edac/i10nm_base.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 8 | ||||
-rw-r--r-- | drivers/i2c/busses/i2c-mpc.c | 15 | ||||
-rw-r--r-- | drivers/i2c/i2c-core-base.c | 95 | ||||
-rw-r--r-- | drivers/md/raid1.c | 3 | ||||
-rw-r--r-- | drivers/power/reset/ltc2952-poweroff.c | 4 | ||||
-rw-r--r-- | drivers/power/supply/bq25890_charger.c | 4 | ||||
-rw-r--r-- | drivers/power/supply/power_supply_core.c | 4 | ||||
-rw-r--r-- | drivers/reset/reset-rzg2l-usbphy-ctrl.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 3 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-internal.h | 19 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 33 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 88 | ||||
-rw-r--r-- | tools/testing/selftests/cgroup/cgroup_util.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/cgroup/test_core.c | 165 |
20 files changed, 304 insertions, 164 deletions
diff --git a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml index ca155abbda7a..037f41f58503 100644 --- a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml +++ b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml @@ -29,6 +29,7 @@ properties: - ti,am654-ospi - intel,lgm-qspi - xlnx,versal-ospi-1.0 + - intel,socfpga-qspi - const: cdns,qspi-nor - const: cdns,qspi-nor diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 0b021eef0b53..7c1d6423d7f8 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -782,7 +782,7 @@ }; qspi: spi@ff705000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff705000 0x1000>, diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index a574ea91d9d3..3ba431dfa8c9 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -756,7 +756,7 @@ }; qspi: spi@ff809000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff809000 0x100>, diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index d301ac0d406b..3ec301bd08a9 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -594,7 +594,7 @@ }; qspi: spi@ff8d2000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff8d2000 0x100>, diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 163f33b46e4f..0dd2d2ee765a 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -628,7 +628,7 @@ }; qspi: spi@ff8d2000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff8d2000 0x100>, diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 83345bfac246..6cf50ee0b77c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -358,6 +358,9 @@ static int i10nm_get_hbm_munits(void) mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); if (!mbase) { + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", base + off); return -ENOMEM; @@ -368,6 +371,12 @@ static int i10nm_get_hbm_munits(void) mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); if (!I10NM_IS_HBM_IMC(mcmtr)) { + iounmap(d->imc[lmc].mbase); + d->imc[lmc].mbase = NULL; + d->imc[lmc].hbm_mc = false; + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); return -ENODEV; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 9afd11ca2709..45977a72b5dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -547,9 +547,6 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - if (!ring->no_scheduler) - drm_sched_stop(&ring->sched, NULL); - /* You can't wait for HW to signal if it's gone */ if (!drm_dev_is_unplugged(adev_to_drm(adev))) r = amdgpu_fence_wait_empty(ring); @@ -609,11 +606,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - if (!ring->no_scheduler) { - drm_sched_resubmit_jobs(&ring->sched); - drm_sched_start(&ring->sched, true); - } - /* enable the interrupt */ if (ring->fence_drv.irq_src) amdgpu_irq_get(adev, ring->fence_drv.irq_src, diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 53b8da6dbb23..db26cc36e13f 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -492,7 +492,7 @@ static void mpc_i2c_finish(struct mpc_i2c *i2c, int rc) static void mpc_i2c_do_action(struct mpc_i2c *i2c) { - struct i2c_msg *msg = &i2c->msgs[i2c->curr_msg]; + struct i2c_msg *msg = NULL; int dir = 0; int recv_len = 0; u8 byte; @@ -501,10 +501,13 @@ static void mpc_i2c_do_action(struct mpc_i2c *i2c) i2c->cntl_bits &= ~(CCR_RSTA | CCR_MTX | CCR_TXAK); - if (msg->flags & I2C_M_RD) - dir = 1; - if (msg->flags & I2C_M_RECV_LEN) - recv_len = 1; + if (i2c->action != MPC_I2C_ACTION_STOP) { + msg = &i2c->msgs[i2c->curr_msg]; + if (msg->flags & I2C_M_RD) + dir = 1; + if (msg->flags & I2C_M_RECV_LEN) + recv_len = 1; + } switch (i2c->action) { case MPC_I2C_ACTION_RESTART: @@ -581,7 +584,7 @@ static void mpc_i2c_do_action(struct mpc_i2c *i2c) break; } - if (msg->len == i2c->byte_posn) { + if (msg && msg->len == i2c->byte_posn) { i2c->curr_msg++; i2c->byte_posn = 0; diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index f193f9058584..73253e667de1 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -466,14 +466,12 @@ static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client) static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); - struct i2c_adapter *adap; struct i2c_driver *driver; int status; if (!client) return 0; - adap = client->adapter; client->irq = client->init_irq; if (!client->irq) { @@ -539,14 +537,6 @@ static int i2c_device_probe(struct device *dev) dev_dbg(dev, "probe\n"); - if (adap->bus_regulator) { - status = regulator_enable(adap->bus_regulator); - if (status < 0) { - dev_err(&adap->dev, "Failed to enable bus regulator\n"); - goto err_clear_wakeup_irq; - } - } - status = of_clk_set_defaults(dev->of_node, false); if (status < 0) goto err_clear_wakeup_irq; @@ -605,10 +595,8 @@ put_sync_adapter: static void i2c_device_remove(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); - struct i2c_adapter *adap; struct i2c_driver *driver; - adap = client->adapter; driver = to_i2c_driver(dev->driver); if (driver->remove) { int status; @@ -623,8 +611,6 @@ static void i2c_device_remove(struct device *dev) devres_release_group(&client->dev, client->devres_group_id); dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); - if (!pm_runtime_status_suspended(&client->dev) && adap->bus_regulator) - regulator_disable(adap->bus_regulator); dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); @@ -634,86 +620,6 @@ static void i2c_device_remove(struct device *dev) pm_runtime_put(&client->adapter->dev); } -#ifdef CONFIG_PM_SLEEP -static int i2c_resume_early(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - int err; - - if (!client) - return 0; - - if (pm_runtime_status_suspended(&client->dev) && - client->adapter->bus_regulator) { - err = regulator_enable(client->adapter->bus_regulator); - if (err) - return err; - } - - return pm_generic_resume_early(&client->dev); -} - -static int i2c_suspend_late(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - int err; - - if (!client) - return 0; - - err = pm_generic_suspend_late(&client->dev); - if (err) - return err; - - if (!pm_runtime_status_suspended(&client->dev) && - client->adapter->bus_regulator) - return regulator_disable(client->adapter->bus_regulator); - - return 0; -} -#endif - -#ifdef CONFIG_PM -static int i2c_runtime_resume(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - int err; - - if (!client) - return 0; - - if (client->adapter->bus_regulator) { - err = regulator_enable(client->adapter->bus_regulator); - if (err) - return err; - } - - return pm_generic_runtime_resume(&client->dev); -} - -static int i2c_runtime_suspend(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - int err; - - if (!client) - return 0; - - err = pm_generic_runtime_suspend(&client->dev); - if (err) - return err; - - if (client->adapter->bus_regulator) - return regulator_disable(client->adapter->bus_regulator); - return 0; -} -#endif - -static const struct dev_pm_ops i2c_device_pm = { - SET_LATE_SYSTEM_SLEEP_PM_OPS(i2c_suspend_late, i2c_resume_early) - SET_RUNTIME_PM_OPS(i2c_runtime_suspend, i2c_runtime_resume, NULL) -}; - static void i2c_device_shutdown(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); @@ -773,7 +679,6 @@ struct bus_type i2c_bus_type = { .probe = i2c_device_probe, .remove = i2c_device_remove, .shutdown = i2c_device_shutdown, - .pm = &i2c_device_pm, }; EXPORT_SYMBOL_GPL(i2c_bus_type); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7dc8026cf6ee..85505424f7a4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1496,12 +1496,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (!r1_bio->bios[i]) continue; - if (first_clone && test_bit(WriteMostly, &rdev->flags)) { + if (first_clone) { /* do behind I/O ? * Not if there are too many, or cannot * allocate memory, or a reader on WriteMostly * is waiting for behind writes to flush */ if (bitmap && + test_bit(WriteMostly, &rdev->flags) && (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && !waitqueue_active(&bitmap->behind_wait)) { diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c index fbb344353fe4..65d9528cc989 100644 --- a/drivers/power/reset/ltc2952-poweroff.c +++ b/drivers/power/reset/ltc2952-poweroff.c @@ -159,8 +159,8 @@ static void ltc2952_poweroff_kill(void) static void ltc2952_poweroff_default(struct ltc2952_poweroff *data) { - data->wde_interval = 300L * 1E6L; - data->trigger_delay = ktime_set(2, 500L*1E6L); + data->wde_interval = 300L * NSEC_PER_MSEC; + data->trigger_delay = ktime_set(2, 500L * NSEC_PER_MSEC); hrtimer_init(&data->timer_trigger, CLOCK_MONOTONIC, HRTIMER_MODE_REL); data->timer_trigger.function = ltc2952_poweroff_timer_trigger; diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 34ec186a2e9a..b7eac5428083 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -581,12 +581,12 @@ static irqreturn_t __bq25890_handle_irq(struct bq25890_device *bq) if (!new_state.online && bq->state.online) { /* power removed */ /* disable ADC */ - ret = bq25890_field_write(bq, F_CONV_START, 0); + ret = bq25890_field_write(bq, F_CONV_RATE, 0); if (ret < 0) goto error; } else if (new_state.online && !bq->state.online) { /* power inserted */ /* enable ADC, to have control of charge current/voltage */ - ret = bq25890_field_write(bq, F_CONV_START, 1); + ret = bq25890_field_write(bq, F_CONV_RATE, 1); if (ret < 0) goto error; } diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index fc12a4f407f4..6093754cebd5 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -853,6 +853,10 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, return NULL; for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) { + /* Out of capacity tables */ + if (!info->ocv_table[i]) + break; + temp_diff = abs(info->ocv_temp[i] - temp); if (temp_diff < best_temp_diff) { diff --git a/drivers/reset/reset-rzg2l-usbphy-ctrl.c b/drivers/reset/reset-rzg2l-usbphy-ctrl.c index e0704fd2b533..1e8315038850 100644 --- a/drivers/reset/reset-rzg2l-usbphy-ctrl.c +++ b/drivers/reset/reset-rzg2l-usbphy-ctrl.c @@ -137,7 +137,12 @@ static int rzg2l_usbphy_ctrl_probe(struct platform_device *pdev) dev_set_drvdata(dev, priv); pm_runtime_enable(&pdev->dev); - pm_runtime_resume_and_get(&pdev->dev); + error = pm_runtime_resume_and_get(&pdev->dev); + if (error < 0) { + pm_runtime_disable(&pdev->dev); + reset_control_assert(priv->rstc); + return dev_err_probe(&pdev->dev, error, "pm_runtime_resume_and_get failed"); + } /* put pll and phy into reset state */ spin_lock_irqsave(&priv->lock, flags); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 174cd8950cb6..bc85e045845d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -687,7 +687,8 @@ xfs_ioc_space( if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), 0); + bf->l_start - XFS_ISIZE(ip), + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index bfbeabc17a9d..6e36e854b512 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -65,6 +65,25 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) return container_of(kfc, struct cgroup_fs_context, kfc); } +struct cgroup_pidlist; + +struct cgroup_file_ctx { + struct cgroup_namespace *ns; + + struct { + void *trigger; + } psi; + + struct { + bool started; + struct css_task_iter iter; + } procs; + + struct { + struct cgroup_pidlist *pidlist; + } procs1; +}; + /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 81c9e0685948..41e0837a5a0b 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -394,6 +394,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) * next pid to display, if any */ struct kernfs_open_file *of = s->private; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = seq_css(s)->cgroup; struct cgroup_pidlist *l; enum cgroup_filetype type = seq_cft(s)->private; @@ -403,25 +404,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) mutex_lock(&cgrp->pidlist_mutex); /* - * !NULL @of->priv indicates that this isn't the first start() - * after open. If the matching pidlist is around, we can use that. - * Look for it. Note that @of->priv can't be used directly. It - * could already have been destroyed. + * !NULL @ctx->procs1.pidlist indicates that this isn't the first + * start() after open. If the matching pidlist is around, we can use + * that. Look for it. Note that @ctx->procs1.pidlist can't be used + * directly. It could already have been destroyed. */ - if (of->priv) - of->priv = cgroup_pidlist_find(cgrp, type); + if (ctx->procs1.pidlist) + ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); /* * Either this is the first start() after open or the matching * pidlist has been destroyed inbetween. Create a new one. */ - if (!of->priv) { - ret = pidlist_array_load(cgrp, type, - (struct cgroup_pidlist **)&of->priv); + if (!ctx->procs1.pidlist) { + ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); if (ret) return ERR_PTR(ret); } - l = of->priv; + l = ctx->procs1.pidlist; if (pid) { int end = l->length; @@ -449,7 +449,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) static void cgroup_pidlist_stop(struct seq_file *s, void *v) { struct kernfs_open_file *of = s->private; - struct cgroup_pidlist *l = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct cgroup_pidlist *l = ctx->procs1.pidlist; if (l) mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, @@ -460,7 +461,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v) static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; - struct cgroup_pidlist *l = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct cgroup_pidlist *l = ctx->procs1.pidlist; pid_t *p = v; pid_t *end = l->list + l->length; /* @@ -504,10 +506,11 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, goto out_unlock; /* - * Even if we're attaching all tasks in the thread group, we only - * need to check permissions on one of them. + * Even if we're attaching all tasks in the thread group, we only need + * to check permissions on one of them. Check permissions using the + * credentials from file open to protect against inherited fd attacks. */ - cred = current_cred(); + cred = of->file->f_cred; tcred = get_task_cred(task); if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 919194de39c8..cafb8c114a21 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3630,6 +3630,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, enum psi_res res) { + struct cgroup_file_ctx *ctx = of->priv; struct psi_trigger *new; struct cgroup *cgrp; struct psi_group *psi; @@ -3648,7 +3649,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&of->priv, new); + psi_trigger_replace(&ctx->psi.trigger, new); cgroup_put(cgrp); @@ -3679,12 +3680,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, poll_table *pt) { - return psi_trigger_poll(&of->priv, of->file, pt); + struct cgroup_file_ctx *ctx = of->priv; + + return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); } static void cgroup_pressure_release(struct kernfs_open_file *of) { - psi_trigger_replace(&of->priv, NULL); + struct cgroup_file_ctx *ctx = of->priv; + + psi_trigger_replace(&ctx->psi.trigger, NULL); } bool cgroup_psi_enabled(void) @@ -3811,24 +3816,43 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, static int cgroup_file_open(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx; + int ret; - if (cft->open) - return cft->open(of); - return 0; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ns = current->nsproxy->cgroup_ns; + get_cgroup_ns(ctx->ns); + of->priv = ctx; + + if (!cft->open) + return 0; + + ret = cft->open(of); + if (ret) { + put_cgroup_ns(ctx->ns); + kfree(ctx); + } + return ret; } static void cgroup_file_release(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx = of->priv; if (cft->release) cft->release(of); + put_cgroup_ns(ctx->ns); + kfree(ctx); } static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = of->kn->parent->priv; struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; @@ -3845,7 +3869,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, */ if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && !(cft->flags & CFTYPE_NS_DELEGATABLE) && - ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) + ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) return -EPERM; if (cft->write) @@ -4751,21 +4775,21 @@ void css_task_iter_end(struct css_task_iter *it) static void cgroup_procs_release(struct kernfs_open_file *of) { - if (of->priv) { - css_task_iter_end(of->priv); - kfree(of->priv); - } + struct cgroup_file_ctx *ctx = of->priv; + + if (ctx->procs.started) + css_task_iter_end(&ctx->procs.iter); } static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; if (pos) (*pos)++; - return css_task_iter_next(it); + return css_task_iter_next(&ctx->procs.iter); } static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, @@ -4773,21 +4797,18 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, { struct kernfs_open_file *of = s->private; struct cgroup *cgrp = seq_css(s)->cgroup; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct css_task_iter *it = &ctx->procs.iter; /* * When a seq_file is seeked, it's always traversed sequentially * from position 0, so we can simply keep iterating on !0 *pos. */ - if (!it) { + if (!ctx->procs.started) { if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); - - it = kzalloc(sizeof(*it), GFP_KERNEL); - if (!it) - return ERR_PTR(-ENOMEM); - of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); + ctx->procs.started = true; } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); @@ -4838,9 +4859,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb) static int cgroup_procs_write_permission(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb) + struct super_block *sb, + struct cgroup_namespace *ns) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup *com_cgrp = src_cgrp; int ret; @@ -4869,11 +4890,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp, static int cgroup_attach_permissions(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb, bool threadgroup) + struct super_block *sb, bool threadgroup, + struct cgroup_namespace *ns) { int ret = 0; - ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb); + ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); if (ret) return ret; @@ -4890,8 +4912,10 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp, static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, bool threadgroup) { + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; + const struct cred *saved_cred; ssize_t ret; bool locked; @@ -4909,9 +4933,16 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); spin_unlock_irq(&css_set_lock); - /* process and thread migrations follow same delegation rule */ + /* + * Process and thread migrations follow same delegation rule. Check + * permissions using the credentials from file open to protect against + * inherited fd attacks. + */ + saved_cred = override_creds(of->file->f_cred); ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb, threadgroup); + of->file->f_path.dentry->d_sb, + threadgroup, ctx->ns); + revert_creds(saved_cred); if (ret) goto out_finish; @@ -6130,7 +6161,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) goto err; ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, - !(kargs->flags & CLONE_THREAD)); + !(kargs->flags & CLONE_THREAD), + current->nsproxy->cgroup_ns); if (ret) goto err; diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 623cec04ad42..0cf7e90c0052 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, size_t len) int cg_create(const char *cgroup) { - return mkdir(cgroup, 0644); + return mkdir(cgroup, 0755); } int cg_wait_for_proc_count(const char *cgroup, int count) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 3df648c37876..600123503063 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -1,11 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE #include <linux/limits.h> +#include <linux/sched.h> #include <sys/types.h> #include <sys/mman.h> #include <sys/wait.h> #include <unistd.h> #include <fcntl.h> +#include <sched.h> #include <stdio.h> #include <errno.h> #include <signal.h> @@ -674,6 +677,166 @@ cleanup: return ret; } +/* + * cgroup migration permission check should be performed based on the + * credentials at the time of open instead of write. + */ +static int test_cgcore_lesser_euid_open(const char *root) +{ + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + uid_t saved_uid; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + saved_uid = geteuid(); + if (seteuid(test_euid)) + goto cleanup; + + cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR); + + if (seteuid(saved_uid)) + goto cleanup; + + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + +struct lesser_ns_open_thread_arg { + const char *path; + int fd; + int err; +}; + +static int lesser_ns_open_thread_fn(void *arg) +{ + struct lesser_ns_open_thread_arg *targ = arg; + + targ->fd = open(targ->path, O_RDWR); + targ->err = errno; + return 0; +} + +/* + * cgroup migration permission check should be performed based on the cgroup + * namespace at the time of open instead of write. + */ +static int test_cgcore_lesser_ns_open(const char *root) +{ + static char stack[65536]; + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + struct lesser_ns_open_thread_arg targ = { .fd = -1 }; + pid_t pid; + int status; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_b)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + targ.path = cg_test_b_procs; + pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack), + CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD, + &targ); + if (pid < 0) + goto cleanup; + + if (waitpid(pid, &status, 0) < 0) + goto cleanup; + + if (!WIFEXITED(status)) + goto cleanup; + + cg_test_b_procs_fd = targ.fd; + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -689,6 +852,8 @@ struct corecg_test { T(test_cgcore_proc_migration), T(test_cgcore_thread_migration), T(test_cgcore_destroy), + T(test_cgcore_lesser_euid_open), + T(test_cgcore_lesser_ns_open), }; #undef T |