Merge branch 'for-4.2/writeback' of git://git.kernel.dk/linux-block

Pull cgroup writeback support from Jens Axboe: "This is the big pull request for adding cgroup writeback support. This code has been in development for a long time, and it has been simmering in for-next for a good chunk of this cycle too. This is one of those problems that has been talked about for at least half a decade, finally there's a solution and code to go with it. Also see last weeks writeup on LWN: http://lwn.net/Articles/648292/" * 'for-4.2/writeback' of git://git.kernel.dk/linux-block: (85 commits) writeback, blkio: add documentation for cgroup writeback support vfs, writeback: replace FS_CGROUP_WRITEBACK with SB_I_CGROUPWB writeback: do foreign inode detection iff cgroup writeback is enabled v9fs: fix error handling in v9fs_session_init() bdi: fix wrong error return value in cgwb_create() buffer: remove unusued 'ret' variable writeback: disassociate inodes from dying bdi_writebacks writeback: implement foreign cgroup inode bdi_writeback switching writeback: add lockdep annotation to inode_to_wb() writeback: use unlocked_inode_to_wb transaction in inode_congested() writeback: implement unlocked_inode_to_wb transaction and use it for stat updates writeback: implement [locked_]inode_to_wb_and_lock_list() writeback: implement foreign cgroup inode detection writeback: make writeback_control track the inode being written back writeback: relocate wb[_try]_get(), wb_put(), inode_{attach|detach}_wb() mm: vmscan: disable memcg direct reclaim stalling if cgroup writeback support is in use writeback: implement memcg writeback domain based throttling writeback: reset wb_domain->dirty_limit[_tstmp] when memcg domain size changes writeback: implement memcg wb_domain writeback: update wb_over_bg_thresh() to use wb_domain aware operations ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-06-25 19:00:17 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-06-25 19:00:17 -0400
commit: e4bc13adfd016fc1036838170288b5680d1a98b0 (patch)
tree: 8d2cb749397749439732f3a827cb7f2336408337 /block
parent: ad90fb97515b732bc27a0109baa10af636c3c8cd (diff)
parent: 3e1534cf4a2a8278e811e7c84a79da1a02347b8b (diff)
11 files changed, 139 insertions, 728 deletions
diff --git a/block/bio.c b/block/bio.c
index 259197d97de1..2a00d349cd68 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1988,6 +1988,28 @@ struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_
 EXPORT_SYMBOL(bioset_create_nobvec);
 #ifdef CONFIG_BLK_CGROUP
+/**
+ * bio_associate_blkcg - associate a bio with the specified blkcg
+ * @bio: target bio
+ * @blkcg_css: css of the blkcg to associate
+ *
+ * Associate @bio with the blkcg specified by @blkcg_css.  Block layer will
+ * treat @bio as if it were issued by a task which belongs to the blkcg.
+ *
+ * This function takes an extra reference of @blkcg_css which will be put
+ * when @bio is released.  The caller must own @bio and is responsible for
+ * synchronizing calls to this function.
+ */
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
+{
+        if (unlikely(bio->bi_css))
+                return -EBUSY;
+        css_get(blkcg_css);
+        bio->bi_css = blkcg_css;
+        return 0;
+}
 /**
 * bio_associate_current - associate a bio with %current
 * @bio: target bio
@@ -2004,26 +2026,17 @@ EXPORT_SYMBOL(bioset_create_nobvec);
 int bio_associate_current(struct bio *bio)
 {
        struct io_context *ioc;
-        struct cgroup_subsys_state *css;
-        if (bio->bi_ioc)
+        if (bio->bi_css)
                return -EBUSY;
        ioc = current->io_context;
        if (!ioc)
                return -ENOENT;
-        /* acquire active ref on @ioc and associate */
        get_io_context_active(ioc);
        bio->bi_ioc = ioc;
+        bio->bi_css = task_get_css(current, blkio_cgrp_id);
-        /* associate blkcg if exists */
-        rcu_read_lock();
-        css = task_css(current, blkio_cgrp_id);
-        if (css && css_tryget_online(css))
-                bio->bi_css = css;
-        rcu_read_unlock();
        return 0;
 }
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 6e43fa355e71..9f97da52d006 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -19,11 +19,12 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 #include <linux/slab.h>
 #include <linux/genhd.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
-#include "blk-cgroup.h"
+#include <linux/blk-cgroup.h>
 #include "blk.h"
 #define MAX_KEY_LEN 100
@@ -33,6 +34,8 @@ static DEFINE_MUTEX(blkcg_pol_mutex);
 struct blkcg blkcg_root;
 EXPORT_SYMBOL_GPL(blkcg_root);
+struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
 static bool blkcg_policy_enabled(struct request_queue *q,
@@ -182,6 +185,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
                                    struct blkcg_gq *new_blkg)
 {
        struct blkcg_gq *blkg;
+        struct bdi_writeback_congested *wb_congested;
        int i, ret;
        WARN_ON_ONCE(!rcu_read_lock_held());
@@ -193,22 +197,30 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
                goto err_free_blkg;
        }
+        wb_congested = wb_congested_get_create(&q->backing_dev_info,
+                                               blkcg->css.id, GFP_ATOMIC);
+        if (!wb_congested) {
+                ret = -ENOMEM;
+                goto err_put_css;
+        }
        /* allocate */
        if (!new_blkg) {
                new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
                if (unlikely(!new_blkg)) {
                        ret = -ENOMEM;
-                        goto err_put_css;
+                        goto err_put_congested;
                }
        }
        blkg = new_blkg;
+        blkg->wb_congested = wb_congested;
        /* link parent */
        if (blkcg_parent(blkcg)) {
                blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
                if (WARN_ON_ONCE(!blkg->parent)) {
                        ret = -EINVAL;
-                        goto err_put_css;
+                        goto err_put_congested;
                }
                blkg_get(blkg->parent);
        }
@@ -238,18 +250,15 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
        blkg->online = true;
        spin_unlock(&blkcg->lock);
-        if (!ret) {
+        if (!ret)
-                if (blkcg == &blkcg_root) {
-                        q->root_blkg = blkg;
-                        q->root_rl.blkg = blkg;
-                }
                return blkg;
-        }
        /* @blkg failed fully initialized, use the usual release path */
        blkg_put(blkg);
        return ERR_PTR(ret);
+err_put_congested:
+        wb_congested_put(wb_congested);
 err_put_css:
        css_put(&blkcg->css);
 err_free_blkg:
@@ -343,15 +352,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
                rcu_assign_pointer(blkcg->blkg_hint, NULL);
        /*
-         * If root blkg is destroyed.  Just clear the pointer since root_rl
-         * does not take reference on root blkg.
-         */
-        if (blkcg == &blkcg_root) {
-                blkg->q->root_blkg = NULL;
-                blkg->q->root_rl.blkg = NULL;
-        }
-        /*
         * Put the reference taken at the time of creation so that when all
         * queues are gone, group can be destroyed.
         */
@@ -405,6 +405,8 @@ void __blkg_release_rcu(struct rcu_head *rcu_head)
        if (blkg->parent)
                blkg_put(blkg->parent);
+        wb_congested_put(blkg->wb_congested);
        blkg_free(blkg);
 }
 EXPORT_SYMBOL_GPL(__blkg_release_rcu);
@@ -812,6 +814,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
        }
        spin_unlock_irq(&blkcg->lock);
+        wb_blkcg_offline(blkcg);
 }
 static void blkcg_css_free(struct cgroup_subsys_state *css)
@@ -868,7 +872,9 @@ done:
        spin_lock_init(&blkcg->lock);
        INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
        INIT_HLIST_HEAD(&blkcg->blkg_list);
+#ifdef CONFIG_CGROUP_WRITEBACK
+        INIT_LIST_HEAD(&blkcg->cgwb_list);
+#endif
        return &blkcg->css;
 free_pd_blkcg:
@@ -892,9 +898,45 @@ free_blkcg:
 */
 int blkcg_init_queue(struct request_queue *q)
 {
-        might_sleep();
+        struct blkcg_gq *new_blkg, *blkg;
+        bool preloaded;
+        int ret;
+        new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
+        if (!new_blkg)
+                return -ENOMEM;
+        preloaded = !radix_tree_preload(GFP_KERNEL);
+        /*
+         * Make sure the root blkg exists and count the existing blkgs.  As
+         * @q is bypassing at this point, blkg_lookup_create() can't be
+         * used.  Open code insertion.
+         */
+        rcu_read_lock();
+        spin_lock_irq(q->queue_lock);
+        blkg = blkg_create(&blkcg_root, q, new_blkg);
+        spin_unlock_irq(q->queue_lock);
+        rcu_read_unlock();
+        if (preloaded)
+                radix_tree_preload_end();
+        if (IS_ERR(blkg)) {
+                kfree(new_blkg);
+                return PTR_ERR(blkg);
+        }
-        return blk_throtl_init(q);
+        q->root_blkg = blkg;
+        q->root_rl.blkg = blkg;
+        ret = blk_throtl_init(q);
+        if (ret) {
+                spin_lock_irq(q->queue_lock);
+                blkg_destroy_all(q);
+                spin_unlock_irq(q->queue_lock);
+        }
+        return ret;
 }
 /**
@@ -996,50 +1038,19 @@ int blkcg_activate_policy(struct request_queue *q,
 {
        LIST_HEAD(pds);
        LIST_HEAD(cpds);
-        struct blkcg_gq *blkg, *new_blkg;
+        struct blkcg_gq *blkg;
        struct blkg_policy_data *pd, *nd;
        struct blkcg_policy_data *cpd, *cnd;
        int cnt = 0, ret;
-        bool preloaded;
        if (blkcg_policy_enabled(q, pol))
                return 0;
-        /* preallocations for root blkg */
+        /* count and allocate policy_data for all existing blkgs */
-        new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
-        if (!new_blkg)
-                return -ENOMEM;
        blk_queue_bypass_start(q);
-        preloaded = !radix_tree_preload(GFP_KERNEL);
-        /*
-         * Make sure the root blkg exists and count the existing blkgs.  As
-         * @q is bypassing at this point, blkg_lookup_create() can't be
-         * used.  Open code it.
-         */
        spin_lock_irq(q->queue_lock);
-        rcu_read_lock();
-        blkg = __blkg_lookup(&blkcg_root, q, false);
-        if (blkg)
-                blkg_free(new_blkg);
-        else
-                blkg = blkg_create(&blkcg_root, q, new_blkg);
-        rcu_read_unlock();
-        if (preloaded)
-                radix_tree_preload_end();
-        if (IS_ERR(blkg)) {
-                ret = PTR_ERR(blkg);
-                goto out_unlock;
-        }
        list_for_each_entry(blkg, &q->blkg_list, q_node)
                cnt++;
        spin_unlock_irq(q->queue_lock);
        /*
@@ -1140,10 +1151,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
        __clear_bit(pol->plid, q->blkcg_pols);
-        /* if no policy is left, no need for blkgs - shoot them down */
-        if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
-                blkg_destroy_all(q);
        list_for_each_entry(blkg, &q->blkg_list, q_node) {
                /* grab blkcg lock too while removing @pd from @blkg */
                spin_lock(&blkg->blkcg->lock);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
deleted file mode 100644
index 74296a78bba1..000000000000
--- a/block/blk-cgroup.h
+++ /dev/null
@@ -1,627 +0,0 @@
-#ifndef _BLK_CGROUP_H
-#define _BLK_CGROUP_H
-/*
- * Common Block IO controller cgroup interface
- *
- * Based on ideas and code from CFQ, CFS and BFQ:
- * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
- *
- * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
- *                    Paolo Valente <paolo.valente@unimore.it>
- *
- * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
- *                    Nauman Rafique <nauman@google.com>
- */
-#include <linux/cgroup.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/seq_file.h>
-#include <linux/radix-tree.h>
-#include <linux/blkdev.h>
-#include <linux/atomic.h>
-/* Max limits for throttle policy */
-#define THROTL_IOPS_MAX         UINT_MAX
-#ifdef CONFIG_BLK_CGROUP
-enum blkg_rwstat_type {
-        BLKG_RWSTAT_READ,
-        BLKG_RWSTAT_WRITE,
-        BLKG_RWSTAT_SYNC,
-        BLKG_RWSTAT_ASYNC,
-        BLKG_RWSTAT_NR,
-        BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
-};
-struct blkcg_gq;
-struct blkcg {
-        struct cgroup_subsys_state      css;
-        spinlock_t                      lock;
-        struct radix_tree_root          blkg_tree;
-        struct blkcg_gq                 *blkg_hint;
-        struct hlist_head               blkg_list;
-        struct blkcg_policy_data        *pd[BLKCG_MAX_POLS];
-};
-struct blkg_stat {
-        struct u64_stats_sync           syncp;
-        uint64_t                        cnt;
-};
-struct blkg_rwstat {
-        struct u64_stats_sync           syncp;
-        uint64_t                        cnt[BLKG_RWSTAT_NR];
-};
-/*
- * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
- * request_queue (q).  This is used by blkcg policies which need to track
- * information per blkcg - q pair.
- *
- * There can be multiple active blkcg policies and each has its private
- * data on each blkg, the size of which is determined by
- * blkcg_policy->pd_size.  blkcg core allocates and frees such areas
- * together with blkg and invokes pd_init/exit_fn() methods.
- *
- * Such private data must embed struct blkg_policy_data (pd) at the
- * beginning and pd_size can't be smaller than pd.
- */
-struct blkg_policy_data {
-        /* the blkg and policy id this per-policy data belongs to */
-        struct blkcg_gq                 *blkg;
-        int                             plid;
-        /* used during policy activation */
-        struct list_head                alloc_node;
-};
-/*
- * Policies that need to keep per-blkcg data which is independent
- * from any request_queue associated to it must specify its size
- * with the cpd_size field of the blkcg_policy structure and
- * embed a blkcg_policy_data in it. blkcg core allocates
- * policy-specific per-blkcg structures lazily the first time
- * they are actually needed, so it handles them together with
- * blkgs. cpd_init() is invoked to let each policy handle
- * per-blkcg data.
- */
-struct blkcg_policy_data {
-        /* the policy id this per-policy data belongs to */
-        int                             plid;
-        /* used during policy activation */
-        struct list_head                alloc_node;
-};
-/* association between a blk cgroup and a request queue */
-struct blkcg_gq {
-        /* Pointer to the associated request_queue */
-        struct request_queue            *q;
-        struct list_head                q_node;
-        struct hlist_node               blkcg_node;
-        struct blkcg                    *blkcg;
-        /* all non-root blkcg_gq's are guaranteed to have access to parent */
-        struct blkcg_gq                 *parent;
-        /* request allocation list for this blkcg-q pair */
-        struct request_list             rl;
-        /* reference count */
-        atomic_t                        refcnt;
-        /* is this blkg online? protected by both blkcg and q locks */
-        bool                            online;
-        struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
-        struct rcu_head                 rcu_head;
-};
-typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg);
-typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
-typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg);
-typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg);
-typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg);
-typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg);
-struct blkcg_policy {
-        int                             plid;
-        /* policy specific private data size */
-        size_t                          pd_size;
-        /* policy specific per-blkcg data size */
-        size_t                          cpd_size;
-        /* cgroup files for the policy */
-        struct cftype                   *cftypes;
-        /* operations */
-        blkcg_pol_init_cpd_fn           *cpd_init_fn;
-        blkcg_pol_init_pd_fn            *pd_init_fn;
-        blkcg_pol_online_pd_fn          *pd_online_fn;
-        blkcg_pol_offline_pd_fn         *pd_offline_fn;
-        blkcg_pol_exit_pd_fn            *pd_exit_fn;
-        blkcg_pol_reset_pd_stats_fn     *pd_reset_stats_fn;
-};
-extern struct blkcg blkcg_root;
-struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q);
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
-                                    struct request_queue *q);
-int blkcg_init_queue(struct request_queue *q);
-void blkcg_drain_queue(struct request_queue *q);
-void blkcg_exit_queue(struct request_queue *q);
-/* Blkio controller policy registration */
-int blkcg_policy_register(struct blkcg_policy *pol);
-void blkcg_policy_unregister(struct blkcg_policy *pol);
-int blkcg_activate_policy(struct request_queue *q,
-                          const struct blkcg_policy *pol);
-void blkcg_deactivate_policy(struct request_queue *q,
-                             const struct blkcg_policy *pol);
-void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
-                       u64 (*prfill)(struct seq_file *,
-                                     struct blkg_policy_data *, int),
-                       const struct blkcg_policy *pol, int data,
-                       bool show_total);
-u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
-u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
-                         const struct blkg_rwstat *rwstat);
-u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
-u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
-                       int off);
-u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off);
-struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
-                                             int off);
-struct blkg_conf_ctx {
-        struct gendisk                  *disk;
-        struct blkcg_gq                 *blkg;
-        u64                             v;
-};
-int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
-                   const char *input, struct blkg_conf_ctx *ctx);
-void blkg_conf_finish(struct blkg_conf_ctx *ctx);
-static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
-{
-        return css ? container_of(css, struct blkcg, css) : NULL;
-}
-static inline struct blkcg *task_blkcg(struct task_struct *tsk)
-{
-        return css_to_blkcg(task_css(tsk, blkio_cgrp_id));
-}
-static inline struct blkcg *bio_blkcg(struct bio *bio)
-{
-        if (bio && bio->bi_css)
-                return css_to_blkcg(bio->bi_css);
-        return task_blkcg(current);
-}
-/**
- * blkcg_parent - get the parent of a blkcg
- * @blkcg: blkcg of interest
- *
- * Return the parent blkcg of @blkcg.  Can be called anytime.
- */
-static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
-{
-        return css_to_blkcg(blkcg->css.parent);
-}
-/**
- * blkg_to_pdata - get policy private data
- * @blkg: blkg of interest
- * @pol: policy of interest
- *
- * Return pointer to private data associated with the @blkg-@pol pair.
- */
-static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
-                                                  struct blkcg_policy *pol)
-{
-        return blkg ? blkg->pd[pol->plid] : NULL;
-}
-static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
-                                                     struct blkcg_policy *pol)
-{
-        return blkcg ? blkcg->pd[pol->plid] : NULL;
-}
-/**
- * pdata_to_blkg - get blkg associated with policy private data
- * @pd: policy private data of interest
- *
- * @pd is policy private data.  Determine the blkg it's associated with.
- */
-static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
-{
-        return pd ? pd->blkg : NULL;
-}
-/**
- * blkg_path - format cgroup path of blkg
- * @blkg: blkg of interest
- * @buf: target buffer
- * @buflen: target buffer length
- *
- * Format the path of the cgroup of @blkg into @buf.
- */
-static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
-{
-        char *p;
-        p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
-        if (!p) {
-                strncpy(buf, "<unavailable>", buflen);
-                return -ENAMETOOLONG;
-        }
-        memmove(buf, p, buf + buflen - p);
-        return 0;
-}
-/**
- * blkg_get - get a blkg reference
- * @blkg: blkg to get
- *
- * The caller should be holding an existing reference.
- */
-static inline void blkg_get(struct blkcg_gq *blkg)
-{
-        WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
-        atomic_inc(&blkg->refcnt);
-}
-void __blkg_release_rcu(struct rcu_head *rcu);
-/**
- * blkg_put - put a blkg reference
- * @blkg: blkg to put
- */
-static inline void blkg_put(struct blkcg_gq *blkg)
-{
-        WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
-        if (atomic_dec_and_test(&blkg->refcnt))
-                call_rcu(&blkg->rcu_head, __blkg_release_rcu);
-}
-struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
-                               bool update_hint);
-/**
- * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
- * @d_blkg: loop cursor pointing to the current descendant
- * @pos_css: used for iteration
- * @p_blkg: target blkg to walk descendants of
- *
- * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
- * read locked.  If called under either blkcg or queue lock, the iteration
- * is guaranteed to include all and only online blkgs.  The caller may
- * update @pos_css by calling css_rightmost_descendant() to skip subtree.
- * @p_blkg is included in the iteration and the first node to be visited.
- */
-#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)           \
-        css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)   \
-                if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),    \
-                                              (p_blkg)->q, false)))
-/**
- * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
- * @d_blkg: loop cursor pointing to the current descendant
- * @pos_css: used for iteration
- * @p_blkg: target blkg to walk descendants of
- *
- * Similar to blkg_for_each_descendant_pre() but performs post-order
- * traversal instead.  Synchronization rules are the same.  @p_blkg is
- * included in the iteration and the last node to be visited.
- */
-#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)          \
-        css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)  \
-                if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),    \
-                                              (p_blkg)->q, false)))
-/**
- * blk_get_rl - get request_list to use
- * @q: request_queue of interest
- * @bio: bio which will be attached to the allocated request (may be %NULL)
- *
- * The caller wants to allocate a request from @q to use for @bio.  Find
- * the request_list to use and obtain a reference on it.  Should be called
- * under queue_lock.  This function is guaranteed to return non-%NULL
- * request_list.
- */
-static inline struct request_list *blk_get_rl(struct request_queue *q,
-                                              struct bio *bio)
-{
-        struct blkcg *blkcg;
-        struct blkcg_gq *blkg;
-        rcu_read_lock();
-        blkcg = bio_blkcg(bio);
-        /* bypass blkg lookup and use @q->root_rl directly for root */
-        if (blkcg == &blkcg_root)
-                goto root_rl;
-        /*
-         * Try to use blkg->rl.  blkg lookup may fail under memory pressure
-         * or if either the blkcg or queue is going away.  Fall back to
-         * root_rl in such cases.
-         */
-        blkg = blkg_lookup_create(blkcg, q);
-        if (unlikely(IS_ERR(blkg)))
-                goto root_rl;
-        blkg_get(blkg);
-        rcu_read_unlock();
-        return &blkg->rl;
-root_rl:
-        rcu_read_unlock();
-        return &q->root_rl;
-}
-/**
- * blk_put_rl - put request_list
- * @rl: request_list to put
- *
- * Put the reference acquired by blk_get_rl().  Should be called under
- * queue_lock.
- */
-static inline void blk_put_rl(struct request_list *rl)
-{
-        /* root_rl may not have blkg set */
-        if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
-                blkg_put(rl->blkg);
-}
-/**
- * blk_rq_set_rl - associate a request with a request_list
- * @rq: request of interest
- * @rl: target request_list
- *
- * Associate @rq with @rl so that accounting and freeing can know the
- * request_list @rq came from.
- */
-static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl)
-{
-        rq->rl = rl;
-}
-/**
- * blk_rq_rl - return the request_list a request came from
- * @rq: request of interest
- *
- * Return the request_list @rq is allocated from.
- */
-static inline struct request_list *blk_rq_rl(struct request *rq)
-{
-        return rq->rl;
-}
-struct request_list *__blk_queue_next_rl(struct request_list *rl,
-                                         struct request_queue *q);
-/**
- * blk_queue_for_each_rl - iterate through all request_lists of a request_queue
- *
- * Should be used under queue_lock.
- */
-#define blk_queue_for_each_rl(rl, q)    \
-        for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
-static inline void blkg_stat_init(struct blkg_stat *stat)
-{
-        u64_stats_init(&stat->syncp);
-}
-/**
- * blkg_stat_add - add a value to a blkg_stat
- * @stat: target blkg_stat
- * @val: value to add
- *
- * Add @val to @stat.  The caller is responsible for synchronizing calls to
- * this function.
- */
-static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
-{
-        u64_stats_update_begin(&stat->syncp);
-        stat->cnt += val;
-        u64_stats_update_end(&stat->syncp);
-}
-/**
- * blkg_stat_read - read the current value of a blkg_stat
- * @stat: blkg_stat to read
- *
- * Read the current value of @stat.  This function can be called without
- * synchroniztion and takes care of u64 atomicity.
- */
-static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
-{
-        unsigned int start;
-        uint64_t v;
-        do {
-                start = u64_stats_fetch_begin_irq(&stat->syncp);
-                v = stat->cnt;
-        } while (u64_stats_fetch_retry_irq(&stat->syncp, start));
-        return v;
-}
-/**
- * blkg_stat_reset - reset a blkg_stat
- * @stat: blkg_stat to reset
- */
-static inline void blkg_stat_reset(struct blkg_stat *stat)
-{
-        stat->cnt = 0;
-}
-/**
- * blkg_stat_merge - merge a blkg_stat into another
- * @to: the destination blkg_stat
- * @from: the source
- *
- * Add @from's count to @to.
- */
-static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from)
-{
-        blkg_stat_add(to, blkg_stat_read(from));
-}
-static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat)
-{
-        u64_stats_init(&rwstat->syncp);
-}
-/**
- * blkg_rwstat_add - add a value to a blkg_rwstat
- * @rwstat: target blkg_rwstat
- * @rw: mask of REQ_{WRITE|SYNC}
- * @val: value to add
- *
- * Add @val to @rwstat.  The counters are chosen according to @rw.  The
- * caller is responsible for synchronizing calls to this function.
- */
-static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
-                                   int rw, uint64_t val)
-{
-        u64_stats_update_begin(&rwstat->syncp);
-        if (rw & REQ_WRITE)
-                rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
-        else
-                rwstat->cnt[BLKG_RWSTAT_READ] += val;
-        if (rw & REQ_SYNC)
-                rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
-        else
-                rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
-        u64_stats_update_end(&rwstat->syncp);
-}
-/**
- * blkg_rwstat_read - read the current values of a blkg_rwstat
- * @rwstat: blkg_rwstat to read
- *
- * Read the current snapshot of @rwstat and return it as the return value.
- * This function can be called without synchronization and takes care of
- * u64 atomicity.
- */
-static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
-{
-        unsigned int start;
-        struct blkg_rwstat tmp;
-        do {
-                start = u64_stats_fetch_begin_irq(&rwstat->syncp);
-                tmp = *rwstat;
-        } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start));
-        return tmp;
-}
-/**
- * blkg_rwstat_total - read the total count of a blkg_rwstat
- * @rwstat: blkg_rwstat to read
- *
- * Return the total count of @rwstat regardless of the IO direction.  This
- * function can be called without synchronization and takes care of u64
- * atomicity.
- */
-static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
-{
-        struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
-        return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
-}
-/**
- * blkg_rwstat_reset - reset a blkg_rwstat
- * @rwstat: blkg_rwstat to reset
- */
-static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
-{
-        memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
-}
-/**
- * blkg_rwstat_merge - merge a blkg_rwstat into another
- * @to: the destination blkg_rwstat
- * @from: the source
- *
- * Add @from's counts to @to.
- */
-static inline void blkg_rwstat_merge(struct blkg_rwstat *to,
-                                     struct blkg_rwstat *from)
-{
-        struct blkg_rwstat v = blkg_rwstat_read(from);
-        int i;
-        u64_stats_update_begin(&to->syncp);
-        for (i = 0; i < BLKG_RWSTAT_NR; i++)
-                to->cnt[i] += v.cnt[i];
-        u64_stats_update_end(&to->syncp);
-}
-#else   /* CONFIG_BLK_CGROUP */
-struct cgroup;
-struct blkcg;
-struct blkg_policy_data {
-};
-struct blkcg_policy_data {
-};
-struct blkcg_gq {
-};
-struct blkcg_policy {
-};
-static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
-static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
-static inline void blkcg_drain_queue(struct request_queue *q) { }
-static inline void blkcg_exit_queue(struct request_queue *q) { }
-static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
-static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
-static inline int blkcg_activate_policy(struct request_queue *q,
-                                        const struct blkcg_policy *pol) { return 0; }
-static inline void blkcg_deactivate_policy(struct request_queue *q,
-                                           const struct blkcg_policy *pol) { }
-static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
-static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
-                                                  struct blkcg_policy *pol) { return NULL; }
-static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
-static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
-static inline void blkg_get(struct blkcg_gq *blkg) { }
-static inline void blkg_put(struct blkcg_gq *blkg) { }
-static inline struct request_list *blk_get_rl(struct request_queue *q,
-                                              struct bio *bio) { return &q->root_rl; }
-static inline void blk_put_rl(struct request_list *rl) { }
-static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
-static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
-#define blk_queue_for_each_rl(rl, q)    \
-        for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
-#endif  /* CONFIG_BLK_CGROUP */
-#endif  /* _BLK_CGROUP_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index f6ab750060fe..688ae9482cb8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -32,12 +32,12 @@
 #include <linux/delay.h>
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
+#include <linux/blk-cgroup.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
 #include "blk.h"
-#include "blk-cgroup.h"
 #include "blk-mq.h"
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
@@ -63,6 +63,31 @@ struct kmem_cache *blk_requestq_cachep;
 */
 static struct workqueue_struct *kblockd_workqueue;
+static void blk_clear_congested(struct request_list *rl, int sync)
+{
+#ifdef CONFIG_CGROUP_WRITEBACK
+        clear_wb_congested(rl->blkg->wb_congested, sync);
+#else
+        /*
+         * If !CGROUP_WRITEBACK, all blkg's map to bdi->wb and we shouldn't
+         * flip its congestion state for events on other blkcgs.
+         */
+        if (rl == &rl->q->root_rl)
+                clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync);
+#endif
+}
+static void blk_set_congested(struct request_list *rl, int sync)
+{
+#ifdef CONFIG_CGROUP_WRITEBACK
+        set_wb_congested(rl->blkg->wb_congested, sync);
+#else
+        /* see blk_clear_congested() */
+        if (rl == &rl->q->root_rl)
+                set_wb_congested(rl->q->backing_dev_info.wb.congested, sync);
+#endif
+}
 void blk_queue_congestion_threshold(struct request_queue *q)
 {
        int nr;
@@ -623,8 +648,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        q->backing_dev_info.ra_pages =
                        (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
-        q->backing_dev_info.state = 0;
+        q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK;
-        q->backing_dev_info.capabilities = 0;
        q->backing_dev_info.name = "block";
        q->node = node_id;
@@ -847,13 +871,8 @@ static void __freed_request(struct request_list *rl, int sync)
 {
        struct request_queue *q = rl->q;
-        /*
+        if (rl->count[sync] < queue_congestion_off_threshold(q))
-         * bdi isn't aware of blkcg yet.  As all async IOs end up root
+                blk_clear_congested(rl, sync);
-         * blkcg anyway, just use root blkcg state.
-         */
-        if (rl == &q->root_rl &&
-            rl->count[sync] < queue_congestion_off_threshold(q))
-                blk_clear_queue_congested(q, sync);
        if (rl->count[sync] + 1 <= q->nr_requests) {
                if (waitqueue_active(&rl->wait[sync]))
@@ -886,25 +905,25 @@ static void freed_request(struct request_list *rl, unsigned int flags)
 int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
 {
        struct request_list *rl;
+        int on_thresh, off_thresh;
        spin_lock_irq(q->queue_lock);
        q->nr_requests = nr;
        blk_queue_congestion_threshold(q);
+        on_thresh = queue_congestion_on_threshold(q);
+        off_thresh = queue_congestion_off_threshold(q);
-        /* congestion isn't cgroup aware and follows root blkcg for now */
+        blk_queue_for_each_rl(rl, q) {
-        rl = &q->root_rl;
+                if (rl->count[BLK_RW_SYNC] >= on_thresh)
+                        blk_set_congested(rl, BLK_RW_SYNC);
-        if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
+                else if (rl->count[BLK_RW_SYNC] < off_thresh)
-                blk_set_queue_congested(q, BLK_RW_SYNC);
+                        blk_clear_congested(rl, BLK_RW_SYNC);
-        else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
-                blk_clear_queue_congested(q, BLK_RW_SYNC);
-        if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
+                if (rl->count[BLK_RW_ASYNC] >= on_thresh)
-                blk_set_queue_congested(q, BLK_RW_ASYNC);
+                        blk_set_congested(rl, BLK_RW_ASYNC);
-        else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
+                else if (rl->count[BLK_RW_ASYNC] < off_thresh)
-                blk_clear_queue_congested(q, BLK_RW_ASYNC);
+                        blk_clear_congested(rl, BLK_RW_ASYNC);
-        blk_queue_for_each_rl(rl, q) {
                if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
                        blk_set_rl_full(rl, BLK_RW_SYNC);
                } else {
@@ -1014,12 +1033,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
                                }
                        }
                }
-                /*
+                blk_set_congested(rl, is_sync);
-                 * bdi isn't aware of blkcg yet.  As all async IOs end up
-                 * root blkcg anyway, just use root blkcg state.
-                 */
-                if (rl == &q->root_rl)
-                        blk_set_queue_congested(q, is_sync);
        }
        /*
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 79ffb4855af0..f548b64be092 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -21,6 +21,7 @@
 */
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 #include <linux/mempool.h>
 #include <linux/bio.h>
 #include <linux/scatterlist.h>
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 2b8fd302f677..6264b382d4d1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -6,11 +6,12 @@
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 #include <linux/blktrace_api.h>
 #include <linux/blk-mq.h>
+#include <linux/blk-cgroup.h>
 #include "blk.h"
-#include "blk-cgroup.h"
 #include "blk-mq.h"
 struct queue_sysfs_entry {
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 5b9c6d5c3636..b23193518ac7 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -9,7 +9,7 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/blktrace_api.h>
-#include "blk-cgroup.h"
+#include <linux/blk-cgroup.h>
 #include "blk.h"
 /* Max dispatch from a group in 1 round */
diff --git a/block/bounce.c b/block/bounce.c
index 3ab0bce1c947..b17311227c12 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -13,6 +13,7 @@
 #include <linux/pagemap.h>
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d8ad45ccd8fa..c62bb2e650b8 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -14,8 +14,8 @@
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
 #include <linux/blktrace_api.h>
+#include <linux/blk-cgroup.h>
 #include "blk.h"
-#include "blk-cgroup.h"
 /*
 * tunables
diff --git a/block/elevator.c b/block/elevator.c
index 942579d04128..84d63943f2de 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -35,11 +35,11 @@
 #include <linux/hash.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
+#include <linux/blk-cgroup.h>
 #include <trace/events/block.h>
 #include "blk.h"
-#include "blk-cgroup.h"
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
diff --git a/block/genhd.c b/block/genhd.c
index ea982eadaf63..59a1395eedac 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -8,6 +8,7 @@
 #include <linux/kdev_t.h>
 #include <linux/kernel.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/proc_fs.h>
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-06-25 19:00:17 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-06-25 19:00:17 -0400
commit	e4bc13adfd016fc1036838170288b5680d1a98b0 (patch)
tree	8d2cb749397749439732f3a827cb7f2336408337 /block
parent	ad90fb97515b732bc27a0109baa10af636c3c8cd (diff)
parent	3e1534cf4a2a8278e811e7c84a79da1a02347b8b (diff)