summaryrefslogtreecommitdiff
path: root/include/linux/blk-cgroup.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/blk-cgroup.h')
-rw-r--r--include/linux/blk-cgroup.h146
1 files changed, 143 insertions, 3 deletions
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6c666fd7de3c..34aec30e06c7 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -35,6 +35,7 @@ enum blkg_rwstat_type {
BLKG_RWSTAT_WRITE,
BLKG_RWSTAT_SYNC,
BLKG_RWSTAT_ASYNC,
+ BLKG_RWSTAT_DISCARD,
BLKG_RWSTAT_NR,
BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
@@ -136,6 +137,12 @@ struct blkcg_gq {
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
struct rcu_head rcu_head;
+
+ atomic_t use_delay;
+ atomic64_t delay_nsec;
+ atomic64_t delay_start;
+ u64 last_delay;
+ int last_use;
};
typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
@@ -148,6 +155,8 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
+typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
+ size_t size);
struct blkcg_policy {
int plid;
@@ -167,6 +176,7 @@ struct blkcg_policy {
blkcg_pol_offline_pd_fn *pd_offline_fn;
blkcg_pol_free_pd_fn *pd_free_fn;
blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
+ blkcg_pol_stat_pd_fn *pd_stat_fn;
};
extern struct blkcg blkcg_root;
@@ -238,6 +248,42 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
return css_to_blkcg(task_css(current, io_cgrp_id));
}
+static inline bool blk_cgroup_congested(void)
+{
+ struct cgroup_subsys_state *css;
+ bool ret = false;
+
+ rcu_read_lock();
+ css = kthread_blkcg();
+ if (!css)
+ css = task_css(current, io_cgrp_id);
+ while (css) {
+ if (atomic_read(&css->cgroup->congestion_count)) {
+ ret = true;
+ break;
+ }
+ css = css->parent;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
+ * @return: true if this bio needs to be submitted with the root blkg context.
+ *
+ * In order to avoid priority inversions we sometimes need to issue a bio as if
+ * it were attached to the root blkg, and then backcharge to the actual owning
+ * blkg. The idea is we do bio_blkcg() to look up the actual context for the
+ * bio and attach the appropriate blkg to the bio. Then we call this helper and
+ * if it is true run with the root blkg for that queue and then do any
+ * backcharging to the originating cgroup once the io is complete.
+ */
+static inline bool bio_issue_as_root_blkg(struct bio *bio)
+{
+ return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
+}
+
/**
* blkcg_parent - get the parent of a blkcg
* @blkcg: blkcg of interest
@@ -296,6 +342,17 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
}
/**
+ * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for @q at the root level. See also blkg_lookup().
+ */
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{
+ return q->root_blkg;
+}
+
+/**
* blkg_to_pdata - get policy private data
* @blkg: blkg of interest
* @pol: policy of interest
@@ -355,6 +412,21 @@ static inline void blkg_get(struct blkcg_gq *blkg)
atomic_inc(&blkg->refcnt);
}
+/**
+ * blkg_try_get - try and get a blkg reference
+ * @blkg: blkg to get
+ *
+ * This is for use when doing an RCU lookup of the blkg. We may be in the midst
+ * of freeing this blkg, so we can only use it if the refcnt is not zero.
+ */
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
+{
+ if (atomic_inc_not_zero(&blkg->refcnt))
+ return blkg;
+ return NULL;
+}
+
+
void __blkg_release_rcu(struct rcu_head *rcu);
/**
@@ -589,7 +661,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
{
struct percpu_counter *cnt;
- if (op_is_write(op))
+ if (op_is_discard(op))
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
+ else if (op_is_write(op))
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
else
cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
@@ -706,8 +780,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
if (!throtl) {
blkg = blkg ?: q->root_blkg;
- blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
- bio->bi_iter.bi_size);
+ /*
+ * If the bio is flagged with BIO_QUEUE_ENTERED it means this
+ * is a split bio and we would have already accounted for the
+ * size of the bio.
+ */
+ if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
+ blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
+ bio->bi_iter.bi_size);
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
}
@@ -715,6 +795,59 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
return !throtl;
}
+static inline void blkcg_use_delay(struct blkcg_gq *blkg)
+{
+ if (atomic_add_return(1, &blkg->use_delay) == 1)
+ atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
+}
+
+static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
+{
+ int old = atomic_read(&blkg->use_delay);
+
+ if (old == 0)
+ return 0;
+
+ /*
+ * We do this song and dance because we can race with somebody else
+ * adding or removing delay. If we just did an atomic_dec we'd end up
+ * negative and we'd already be in trouble. We need to subtract 1 and
+ * then check to see if we were the last delay so we can drop the
+ * congestion count on the cgroup.
+ */
+ while (old) {
+ int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
+ if (cur == old)
+ break;
+ old = cur;
+ }
+
+ if (old == 0)
+ return 0;
+ if (old == 1)
+ atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+ return 1;
+}
+
+static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
+{
+ int old = atomic_read(&blkg->use_delay);
+ if (!old)
+ return;
+ /* We only want 1 person clearing the congestion count for this blkg. */
+ while (old) {
+ int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
+ if (cur == old) {
+ atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+ break;
+ }
+ old = cur;
+ }
+}
+
+void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
+void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
+void blkcg_maybe_throttle_current(void);
#else /* CONFIG_BLK_CGROUP */
struct blkcg {
@@ -734,9 +867,16 @@ struct blkcg_policy {
#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
+static inline void blkcg_maybe_throttle_current(void) { }
+static inline bool blk_cgroup_congested(void) { return false; }
+
#ifdef CONFIG_BLOCK
+static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
+
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{ return NULL; }
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }