summaryrefslogtreecommitdiff
path: root/block/blk-iocost.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-iocost.c')
-rw-r--r--block/blk-iocost.c293
1 files changed, 192 insertions, 101 deletions
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 495396425bad..a0416927d33d 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -111,7 +111,7 @@
* busy signal.
*
* As devices can have deep queues and be unfair in how the queued commands
- * are executed, soley depending on rq wait may not result in satisfactory
+ * are executed, solely depending on rq wait may not result in satisfactory
* control quality. For a better control quality, completion latency QoS
* parameters can be configured so that the device is considered saturated
* if N'th percentile completion latency rises above the set point.
@@ -232,7 +232,9 @@ enum {
/* 1/64k is granular enough and can easily be handled w/ u32 */
WEIGHT_ONE = 1 << 16,
+};
+enum {
/*
* As vtime is used to calculate the cost of each IO, it needs to
* be fairly high precision. For example, it should be able to
@@ -256,6 +258,11 @@ enum {
VRATE_MIN = VTIME_PER_USEC * VRATE_MIN_PPM / MILLION,
VRATE_CLAMP_ADJ_PCT = 4,
+ /* switch iff the conditions are met for longer than this */
+ AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC,
+};
+
+enum {
/* if IOs end up waiting for requests, issue less */
RQ_WAIT_BUSY_PCT = 5,
@@ -294,9 +301,6 @@ enum {
/* don't let cmds which take a very long time pin lagging for too long */
MAX_LAGGING_PERIODS = 10,
- /* switch iff the conditions are met for longer than this */
- AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC,
-
/*
* Count IO size in 4k pages. The 12bit shift helps keeping
* size-proportional components of cost calculation in closer
@@ -556,7 +560,6 @@ struct ioc_now {
u64 now_ns;
u64 now;
u64 vnow;
- u64 vrate;
};
struct iocg_wait {
@@ -645,7 +648,7 @@ static const struct ioc_params autop[] = {
* vrate adjust percentages indexed by ioc->busy_level. We adjust up on
* vtime credit shortage and down on device saturation.
*/
-static u32 vrate_adj_pct[] =
+static const u32 vrate_adj_pct[] =
{ 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -666,7 +669,7 @@ static struct ioc *q_to_ioc(struct request_queue *q)
static const char __maybe_unused *ioc_name(struct ioc *ioc)
{
- struct gendisk *disk = ioc->rqos.q->disk;
+ struct gendisk *disk = ioc->rqos.disk;
if (!disk)
return "<unknown>";
@@ -797,7 +800,11 @@ static void ioc_refresh_period_us(struct ioc *ioc)
ioc_refresh_margins(ioc);
}
-static int ioc_autop_idx(struct ioc *ioc)
+/*
+ * ioc->rqos.disk isn't initialized when this function is called from
+ * the init path.
+ */
+static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk)
{
int idx = ioc->autop_idx;
const struct ioc_params *p = &autop[idx];
@@ -805,11 +812,11 @@ static int ioc_autop_idx(struct ioc *ioc)
u64 now_ns;
/* rotational? */
- if (!blk_queue_nonrot(ioc->rqos.q))
+ if (!blk_queue_nonrot(disk->queue))
return AUTOP_HDD;
/* handle SATA SSDs w/ broken NCQ */
- if (blk_queue_depth(ioc->rqos.q) == 1)
+ if (blk_queue_depth(disk->queue) == 1)
return AUTOP_SSD_QD1;
/* use one of the normal ssd sets */
@@ -822,7 +829,7 @@ static int ioc_autop_idx(struct ioc *ioc)
/* step up/down based on the vrate */
vrate_pct = div64_u64(ioc->vtime_base_rate * 100, VTIME_PER_USEC);
- now_ns = ktime_get_ns();
+ now_ns = blk_time_get_ns();
if (p->too_fast_vrate_pct && p->too_fast_vrate_pct <= vrate_pct) {
if (!ioc->autop_too_fast_at)
@@ -865,9 +872,14 @@ static void calc_lcoefs(u64 bps, u64 seqiops, u64 randiops,
*page = *seqio = *randio = 0;
- if (bps)
- *page = DIV64_U64_ROUND_UP(VTIME_PER_SEC,
- DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE));
+ if (bps) {
+ u64 bps_pages = DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE);
+
+ if (bps_pages)
+ *page = DIV64_U64_ROUND_UP(VTIME_PER_SEC, bps_pages);
+ else
+ *page = 1;
+ }
if (seqiops) {
v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, seqiops);
@@ -893,21 +905,28 @@ static void ioc_refresh_lcoefs(struct ioc *ioc)
&c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]);
}
-static bool ioc_refresh_params(struct ioc *ioc, bool force)
+/*
+ * struct gendisk is required as an argument because ioc->rqos.disk
+ * is not properly initialized when called from the init path.
+ */
+static bool ioc_refresh_params_disk(struct ioc *ioc, bool force,
+ struct gendisk *disk)
{
const struct ioc_params *p;
int idx;
lockdep_assert_held(&ioc->lock);
- idx = ioc_autop_idx(ioc);
+ idx = ioc_autop_idx(ioc, disk);
p = &autop[idx];
if (idx == ioc->autop_idx && !force)
return false;
- if (idx != ioc->autop_idx)
+ if (idx != ioc->autop_idx) {
atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
+ ioc->vtime_base_rate = VTIME_PER_USEC;
+ }
ioc->autop_idx = idx;
ioc->autop_too_fast_at = 0;
@@ -923,12 +942,17 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force)
ioc->vrate_min = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MIN] *
VTIME_PER_USEC, MILLION);
- ioc->vrate_max = div64_u64((u64)ioc->params.qos[QOS_MAX] *
- VTIME_PER_USEC, MILLION);
+ ioc->vrate_max = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MAX] *
+ VTIME_PER_USEC, MILLION);
return true;
}
+static bool ioc_refresh_params(struct ioc *ioc, bool force)
+{
+ return ioc_refresh_params_disk(ioc, force, ioc->rqos.disk);
+}
+
/*
* When an iocg accumulates too much vtime or gets deactivated, we throw away
* some vtime, which lowers the overall device utilization. As the exact amount
@@ -975,7 +999,7 @@ static void ioc_adjust_base_vrate(struct ioc *ioc, u32 rq_wait_pct,
if (!ioc->busy_level || (ioc->busy_level < 0 && nr_lagging)) {
if (ioc->busy_level != prev_busy_level || nr_lagging)
- trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate),
+ trace_iocost_ioc_vrate_adj(ioc, vrate,
missed_ppm, rq_wait_pct,
nr_lagging, nr_shortages);
@@ -1018,10 +1042,11 @@ static void ioc_adjust_base_vrate(struct ioc *ioc, u32 rq_wait_pct,
static void ioc_now(struct ioc *ioc, struct ioc_now *now)
{
unsigned seq;
+ u64 vrate;
- now->now_ns = ktime_get();
+ now->now_ns = blk_time_get_ns();
now->now = ktime_to_us(now->now_ns);
- now->vrate = atomic64_read(&ioc->vtime_rate);
+ vrate = atomic64_read(&ioc->vtime_rate);
/*
* The current vtime is
@@ -1034,7 +1059,7 @@ static void ioc_now(struct ioc *ioc, struct ioc_now *now)
do {
seq = read_seqcount_begin(&ioc->period_seqcount);
now->vnow = ioc->period_at_vtime +
- (now->now - ioc->period_at) * now->vrate;
+ (now->now - ioc->period_at) * vrate;
} while (read_seqcount_retry(&ioc->period_seqcount, seq));
}
@@ -1073,7 +1098,14 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum,
iocg->child_active_sum);
} else {
- inuse = clamp_t(u32, inuse, 1, active);
+ /*
+ * It may be tempting to turn this into a clamp expression with
+ * a lower limit of 1 but active may be 0, which cannot be used
+ * as an upper limit in that situation. This expression allows
+ * active to clamp inuse unless it is 0, in which case inuse
+ * becomes 1.
+ */
+ inuse = min(inuse, active) ?: 1;
}
iocg->last_inuse = iocg->inuse;
@@ -1236,7 +1268,7 @@ static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now)
static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
- u64 last_period, cur_period;
+ u64 __maybe_unused last_period, cur_period;
u64 vtime, vtarget;
int i;
@@ -1322,16 +1354,24 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
- u64 tdelta, delay, new_delay;
+ u64 tdelta, delay, new_delay, shift;
s64 vover, vover_pct;
u32 hwa;
lockdep_assert_held(&iocg->waitq.lock);
+ /*
+ * If the delay is set by another CPU, we may be in the past. No need to
+ * change anything if so. This avoids decay calculation underflow.
+ */
+ if (time_before64(now->now, iocg->delay_at))
+ return false;
+
/* calculate the current delay in effect - 1/2 every second */
tdelta = now->now - iocg->delay_at;
- if (iocg->delay)
- delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC);
+ shift = div64_u64(tdelta, USEC_PER_SEC);
+ if (iocg->delay && shift < BITS_PER_LONG)
+ delay = iocg->delay >> shift;
else
delay = 0;
@@ -1406,8 +1446,11 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
lockdep_assert_held(&iocg->ioc->lock);
lockdep_assert_held(&iocg->waitq.lock);
- /* make sure that nobody messed with @iocg */
- WARN_ON_ONCE(list_empty(&iocg->active_list));
+ /*
+ * make sure that nobody messed with @iocg. Check iocg->pd.online
+ * to avoid warn when removing blkcg or disk.
+ */
+ WARN_ON_ONCE(list_empty(&iocg->active_list) && iocg->pd.online);
WARN_ON_ONCE(iocg->inuse > 1);
iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt);
@@ -2040,7 +2083,7 @@ static void ioc_forgive_debts(struct ioc *ioc, u64 usage_us_sum, int nr_debtors,
struct ioc_now *now)
{
struct ioc_gq *iocg;
- u64 dur, usage_pct, nr_cycles;
+ u64 dur, usage_pct, nr_cycles, nr_cycles_shift;
/* if no debtor, reset the cycle */
if (!nr_debtors) {
@@ -2102,10 +2145,12 @@ static void ioc_forgive_debts(struct ioc *ioc, u64 usage_us_sum, int nr_debtors,
old_debt = iocg->abs_vdebt;
old_delay = iocg->delay;
+ nr_cycles_shift = min_t(u64, nr_cycles, BITS_PER_LONG - 1);
if (iocg->abs_vdebt)
- iocg->abs_vdebt = iocg->abs_vdebt >> nr_cycles ?: 1;
+ iocg->abs_vdebt = iocg->abs_vdebt >> nr_cycles_shift ?: 1;
+
if (iocg->delay)
- iocg->delay = iocg->delay >> nr_cycles ?: 1;
+ iocg->delay = iocg->delay >> nr_cycles_shift ?: 1;
iocg_kick_waitq(iocg, true, now);
@@ -2203,8 +2248,8 @@ static void ioc_timer_fn(struct timer_list *timer)
LIST_HEAD(surpluses);
int nr_debtors, nr_shortages = 0, nr_lagging = 0;
u64 usage_us_sum = 0;
- u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM];
- u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM];
+ u32 ppm_rthr;
+ u32 ppm_wthr;
u32 missed_ppm[2], rq_wait_pct;
u64 period_vtime;
int prev_busy_level;
@@ -2215,6 +2260,8 @@ static void ioc_timer_fn(struct timer_list *timer)
/* take care of active iocgs */
spin_lock_irq(&ioc->lock);
+ ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM];
+ ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM];
ioc_now(ioc, &now);
period_vtime = now.vnow - ioc->period_at_vtime;
@@ -2287,10 +2334,8 @@ static void ioc_timer_fn(struct timer_list *timer)
else
usage_dur = max_t(u64, now.now - ioc->period_at, 1);
- usage = clamp_t(u32,
- DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE,
- usage_dur),
- 1, WEIGHT_ONE);
+ usage = clamp(DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, usage_dur),
+ 1, WEIGHT_ONE);
/*
* Already donating or accumulated enough to start.
@@ -2428,6 +2473,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
u32 hwi, adj_step;
s64 margin;
u64 cost, new_inuse;
+ unsigned long flags;
current_hweight(iocg, NULL, &hwi);
old_hwi = hwi;
@@ -2446,11 +2492,11 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
iocg->inuse == iocg->active)
return cost;
- spin_lock_irq(&ioc->lock);
+ spin_lock_irqsave(&ioc->lock, flags);
/* we own inuse only when @iocg is in the normal active state */
if (iocg->abs_vdebt || list_empty(&iocg->active_list)) {
- spin_unlock_irq(&ioc->lock);
+ spin_unlock_irqrestore(&ioc->lock, flags);
return cost;
}
@@ -2471,7 +2517,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
} while (time_after64(vtime + cost, now->vnow) &&
iocg->inuse != iocg->active);
- spin_unlock_irq(&ioc->lock);
+ spin_unlock_irqrestore(&ioc->lock, flags);
TRACE_IOCG_PATH(inuse_adjust, iocg, now,
old_inuse, iocg->inuse, old_hwi, hwi);
@@ -2488,6 +2534,10 @@ static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg,
u64 seek_pages = 0;
u64 cost = 0;
+ /* Can't calculate cost for empty bio */
+ if (!bio->bi_iter.bi_size)
+ goto out;
+
switch (bio_op(bio)) {
case REQ_OP_READ:
coef_seqio = ioc->params.lcoefs[LCOEF_RSEQIO];
@@ -2636,7 +2686,7 @@ retry_lock:
if (use_debt) {
iocg_incur_debt(iocg, abs_cost, &now);
if (iocg_kick_delay(iocg, &now))
- blkcg_schedule_throttle(rqos->q->disk,
+ blkcg_schedule_throttle(rqos->disk,
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
iocg_unlock(iocg, ioc_locked, &flags);
return;
@@ -2666,8 +2716,7 @@ retry_lock:
* All waiters are on iocg->waitq and the wait states are
* synchronized using waitq.lock.
*/
- init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
- wait.wait.private = current;
+ init_wait_func(&wait.wait, iocg_wake_fn);
wait.bio = bio;
wait.abs_cost = abs_cost;
wait.committed = false; /* will be set true by waker */
@@ -2737,7 +2786,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
if (likely(!list_empty(&iocg->active_list))) {
iocg_incur_debt(iocg, abs_cost, &now);
if (iocg_kick_delay(iocg, &now))
- blkcg_schedule_throttle(rqos->q->disk,
+ blkcg_schedule_throttle(rqos->disk,
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
} else {
iocg_commit_bio(iocg, bio, abs_cost, cost);
@@ -2778,7 +2827,7 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
return;
}
- on_q_ns = ktime_get_ns() - rq->alloc_time_ns;
+ on_q_ns = blk_time_get_ns() - rq->alloc_time_ns;
rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
@@ -2808,18 +2857,18 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
{
struct ioc *ioc = rqos_to_ioc(rqos);
- blkcg_deactivate_policy(rqos->q, &blkcg_policy_iocost);
+ blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iocost);
spin_lock_irq(&ioc->lock);
ioc->running = IOC_STOP;
spin_unlock_irq(&ioc->lock);
- del_timer_sync(&ioc->timer);
+ timer_shutdown_sync(&ioc->timer);
free_percpu(ioc->pcpu_stat);
kfree(ioc);
}
-static struct rq_qos_ops ioc_rqos_ops = {
+static const struct rq_qos_ops ioc_rqos_ops = {
.throttle = ioc_rqos_throttle,
.merge = ioc_rqos_merge,
.done_bio = ioc_rqos_done_bio,
@@ -2830,9 +2879,7 @@ static struct rq_qos_ops ioc_rqos_ops = {
static int blk_iocost_init(struct gendisk *disk)
{
- struct request_queue *q = disk->queue;
struct ioc *ioc;
- struct rq_qos *rqos;
int i, cpu, ret;
ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
@@ -2855,11 +2902,6 @@ static int blk_iocost_init(struct gendisk *disk)
local64_set(&ccs->rq_wait_ns, 0);
}
- rqos = &ioc->rqos;
- rqos->id = RQ_QOS_COST;
- rqos->ops = &ioc_rqos_ops;
- rqos->q = q;
-
spin_lock_init(&ioc->lock);
timer_setup(&ioc->timer, ioc_timer_fn, 0);
INIT_LIST_HEAD(&ioc->active_iocgs);
@@ -2868,32 +2910,32 @@ static int blk_iocost_init(struct gendisk *disk)
ioc->vtime_base_rate = VTIME_PER_USEC;
atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
seqcount_spinlock_init(&ioc->period_seqcount, &ioc->lock);
- ioc->period_at = ktime_to_us(ktime_get());
+ ioc->period_at = ktime_to_us(blk_time_get());
atomic64_set(&ioc->cur_period, 0);
atomic_set(&ioc->hweight_gen, 0);
spin_lock_irq(&ioc->lock);
ioc->autop_idx = AUTOP_INVALID;
- ioc_refresh_params(ioc, true);
+ ioc_refresh_params_disk(ioc, true, disk);
spin_unlock_irq(&ioc->lock);
/*
- * rqos must be added before activation to allow iocg_pd_init() to
+ * rqos must be added before activation to allow ioc_pd_init() to
* lookup the ioc from q. This means that the rqos methods may get
* called before policy activation completion, can't assume that the
* target bio has an iocg associated and need to test for NULL iocg.
*/
- ret = rq_qos_add(q, rqos);
+ ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops);
if (ret)
goto err_free_ioc;
- ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
+ ret = blkcg_activate_policy(disk, &blkcg_policy_iocost);
if (ret)
goto err_del_qos;
return 0;
err_del_qos:
- rq_qos_del(q, rqos);
+ rq_qos_del(&ioc->rqos);
err_free_ioc:
free_percpu(ioc->pcpu_stat);
kfree(ioc);
@@ -2917,13 +2959,14 @@ static void ioc_cpd_free(struct blkcg_policy_data *cpd)
kfree(container_of(cpd, struct ioc_cgrp, cpd));
}
-static struct blkg_policy_data *ioc_pd_alloc(gfp_t gfp, struct request_queue *q,
- struct blkcg *blkcg)
+static struct blkg_policy_data *ioc_pd_alloc(struct gendisk *disk,
+ struct blkcg *blkcg, gfp_t gfp)
{
int levels = blkcg->css.cgroup->level + 1;
struct ioc_gq *iocg;
- iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, q->node);
+ iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp,
+ disk->node_id);
if (!iocg)
return NULL;
@@ -2958,8 +3001,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
iocg->hweight_inuse = WEIGHT_ONE;
init_waitqueue_head(&iocg->waitq);
- hrtimer_init(&iocg->waitq_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- iocg->waitq_timer.function = iocg_waitq_timer_fn;
+ hrtimer_setup(&iocg->waitq_timer, iocg_waitq_timer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
iocg->level = blkg->blkcg->css.cgroup->level;
@@ -3085,9 +3127,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
return nbytes;
}
- ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
+ blkg_conf_init(&ctx, buf);
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx);
if (ret)
- return ret;
+ goto err;
iocg = blkg_to_iocg(ctx.blkg);
@@ -3106,12 +3150,14 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
weight_updated(iocg, &now);
spin_unlock(&iocg->ioc->lock);
- blkg_conf_finish(&ctx);
+ blkg_conf_exit(&ctx);
return nbytes;
einval:
- blkg_conf_finish(&ctx);
- return -EINVAL;
+ ret = -EINVAL;
+err:
+ blkg_conf_exit(&ctx);
+ return ret;
}
static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
@@ -3123,6 +3169,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
if (!dname)
return 0;
+ spin_lock(&ioc->lock);
seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
ioc->params.qos[QOS_RPPM] / 10000,
@@ -3135,6 +3182,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
ioc->params.qos[QOS_MIN] % 10000 / 100,
ioc->params.qos[QOS_MAX] / 10000,
ioc->params.qos[QOS_MAX] % 10000 / 100);
+ spin_unlock(&ioc->lock);
return 0;
}
@@ -3166,19 +3214,30 @@ static const match_table_t qos_tokens = {
static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
- struct block_device *bdev;
+ struct blkg_conf_ctx ctx;
struct gendisk *disk;
struct ioc *ioc;
u32 qos[NR_QOS_PARAMS];
bool enable, user;
- char *p;
+ char *body, *p;
+ unsigned long memflags;
int ret;
- bdev = blkcg_conf_open_bdev(&input);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ blkg_conf_init(&ctx, input);
+
+ memflags = blkg_conf_open_bdev_frozen(&ctx);
+ if (IS_ERR_VALUE(memflags)) {
+ ret = memflags;
+ goto err;
+ }
+
+ body = ctx.body;
+ disk = ctx.bdev->bd_disk;
+ if (!queue_is_mq(disk->queue)) {
+ ret = -EOPNOTSUPP;
+ goto err;
+ }
- disk = bdev->bd_disk;
ioc = q_to_ioc(disk->queue);
if (!ioc) {
ret = blk_iocost_init(disk);
@@ -3187,13 +3246,14 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
ioc = q_to_ioc(disk->queue);
}
+ blk_mq_quiesce_queue(disk->queue);
+
spin_lock_irq(&ioc->lock);
memcpy(qos, ioc->params.qos, sizeof(qos));
enable = ioc->enabled;
user = ioc->user_qos_params;
- spin_unlock_irq(&ioc->lock);
- while ((p = strsep(&input, " \t\n"))) {
+ while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
@@ -3204,7 +3264,8 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
switch (match_token(p, qos_ctrl_tokens, args)) {
case QOS_ENABLE:
- match_u64(&args[0], &v);
+ if (match_u64(&args[0], &v))
+ goto einval;
enable = v;
continue;
case QOS_CTRL:
@@ -3258,13 +3319,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
if (qos[QOS_MIN] > qos[QOS_MAX])
goto einval;
- spin_lock_irq(&ioc->lock);
-
- if (enable) {
+ if (enable && !ioc->enabled) {
blk_stat_enable_accounting(disk->queue);
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = true;
- } else {
+ } else if (!enable && ioc->enabled) {
+ blk_stat_disable_accounting(disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = false;
}
@@ -3279,12 +3339,21 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
ioc_refresh_params(ioc, true);
spin_unlock_irq(&ioc->lock);
- blkdev_put_no_open(bdev);
+ if (enable)
+ wbt_disable_default(disk);
+ else
+ wbt_enable_default(disk);
+
+ blk_mq_unquiesce_queue(disk->queue);
+
+ blkg_conf_exit_frozen(&ctx, memflags);
return nbytes;
einval:
+ spin_unlock_irq(&ioc->lock);
+ blk_mq_unquiesce_queue(disk->queue);
ret = -EINVAL;
err:
- blkdev_put_no_open(bdev);
+ blkg_conf_exit_frozen(&ctx, memflags);
return ret;
}
@@ -3298,12 +3367,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
if (!dname)
return 0;
+ spin_lock(&ioc->lock);
seq_printf(sf, "%s ctrl=%s model=linear "
"rbps=%llu rseqiops=%llu rrandiops=%llu "
"wbps=%llu wseqiops=%llu wrandiops=%llu\n",
dname, ioc->user_cost_model ? "user" : "auto",
u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
+ spin_unlock(&ioc->lock);
return 0;
}
@@ -3335,31 +3406,44 @@ static const match_table_t i_lcoef_tokens = {
static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
- struct block_device *bdev;
+ struct blkg_conf_ctx ctx;
+ struct request_queue *q;
+ unsigned int memflags;
struct ioc *ioc;
u64 u[NR_I_LCOEFS];
bool user;
- char *p;
+ char *body, *p;
int ret;
- bdev = blkcg_conf_open_bdev(&input);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
+ blkg_conf_init(&ctx, input);
+
+ ret = blkg_conf_open_bdev(&ctx);
+ if (ret)
+ goto err;
- ioc = q_to_ioc(bdev_get_queue(bdev));
+ body = ctx.body;
+ q = bdev_get_queue(ctx.bdev);
+ if (!queue_is_mq(q)) {
+ ret = -EOPNOTSUPP;
+ goto err;
+ }
+
+ ioc = q_to_ioc(q);
if (!ioc) {
- ret = blk_iocost_init(bdev->bd_disk);
+ ret = blk_iocost_init(ctx.bdev->bd_disk);
if (ret)
goto err;
- ioc = q_to_ioc(bdev_get_queue(bdev));
+ ioc = q_to_ioc(q);
}
+ memflags = blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
+
spin_lock_irq(&ioc->lock);
memcpy(u, ioc->params.i_lcoefs, sizeof(u));
user = ioc->user_cost_model;
- spin_unlock_irq(&ioc->lock);
- while ((p = strsep(&input, " \t\n"))) {
+ while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
@@ -3394,7 +3478,6 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
user = true;
}
- spin_lock_irq(&ioc->lock);
if (user) {
memcpy(ioc->params.i_lcoefs, u, sizeof(u));
ioc->user_cost_model = true;
@@ -3404,13 +3487,21 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
ioc_refresh_params(ioc, true);
spin_unlock_irq(&ioc->lock);
- blkdev_put_no_open(bdev);
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
+
+ blkg_conf_exit(&ctx);
return nbytes;
einval:
+ spin_unlock_irq(&ioc->lock);
+
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q, memflags);
+
ret = -EINVAL;
err:
- blkdev_put_no_open(bdev);
+ blkg_conf_exit(&ctx);
return ret;
}