Merge branch 'for-3.11/core' of git://git.kernel.dk/linux-block

Pull core block IO updates from Jens Axboe: "Here are the core IO block bits for 3.11. It contains: - A tweak to the reserved tag logic from Jan, for weirdo devices with just 3 free tags. But for those it improves things substantially for random writes. - Periodic writeback fix from Jan. Marked for stable as well. - Fix for a race condition in IO scheduler switching from Jianpeng. - The hierarchical blk-cgroup support from Tejun. This is the grunt of the series. - blk-throttle fix from Vivek. Just a note that I'm in the middle of a relocation, whole family is flying out tomorrow. Hence I will be awal the remainder of this week, but back at work again on Monday the 15th. CC'ing Tejun, since any potential "surprises" will most likely be from the blk-cgroup work. But it's been brewing for a while and sitting in my tree and linux-next for a long time, so should be solid." * 'for-3.11/core' of git://git.kernel.dk/linux-block: (36 commits) elevator: Fix a race in elevator switching block: Reserve only one queue tag for sync IO if only 3 tags are available writeback: Fix periodic writeback after fs mount blk-throttle: implement proper hierarchy support blk-throttle: implement throtl_grp->has_rules[] blk-throttle: Account for child group's start time in parent while bio climbs up blk-throttle: add throtl_qnode for dispatch fairness blk-throttle: make throtl_pending_timer_fn() ready for hierarchy blk-throttle: make tg_dispatch_one_bio() ready for hierarchy blk-throttle: make blk_throtl_bio() ready for hierarchy blk-throttle: make blk_throtl_drain() ready for hierarchy blk-throttle: dispatch from throtl_pending_timer_fn() blk-throttle: implement dispatch looping blk-throttle: separate out throtl_service_queue->pending_timer from throtl_data->dispatch_work blk-throttle: set REQ_THROTTLED from throtl_charge_bio() and gate stats update with it blk-throttle: implement sq_to_tg(), sq_to_td() and throtl_log() blk-throttle: add throtl_service_queue->parent_sq blk-throttle: generalize update_disptime optimization in blk_throtl_bio() blk-throttle: dispatch to throtl_data->service_queue.bio_lists[] blk-throttle: move bio_lists[] and friends to throtl_service_queue ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-11 16:03:24 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-11 16:03:24 -0400
commit: 36805aaea5ae3cf1bb32f1643e0a800bb69f0d5b (patch)
tree: 5565132549a0733772b3a2ac6b5cda516ea8cdce /block/blk-cgroup.c
parent: 6d2fa9e141ea56a571ec842fd4f3a86bea44a203 (diff)
parent: d50235b7bc3ee0a0427984d763ea7534149531b4 (diff)
1 files changed, 40 insertions, 65 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e8918ffaf96d..290792a13e3c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -32,26 +32,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);
 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
-static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
-                                      struct request_queue *q, bool update_hint);
-/**
- * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
- * @d_blkg: loop cursor pointing to the current descendant
- * @pos_cgrp: used for iteration
- * @p_blkg: target blkg to walk descendants of
- *
- * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
- * read locked.  If called under either blkcg or queue lock, the iteration
- * is guaranteed to include all and only online blkgs.  The caller may
- * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
- * subtree.
- */
-#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg)          \
-        cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
-                if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
-                                              (p_blkg)->q, false)))
 static bool blkcg_policy_enabled(struct request_queue *q,
                                 const struct blkcg_policy *pol)
 {
@@ -71,18 +51,8 @@ static void blkg_free(struct blkcg_gq *blkg)
        if (!blkg)
                return;
-        for (i = 0; i < BLKCG_MAX_POLS; i++) {
+        for (i = 0; i < BLKCG_MAX_POLS; i++)
-                struct blkcg_policy *pol = blkcg_policy[i];
+                kfree(blkg->pd[i]);
-                struct blkg_policy_data *pd = blkg->pd[i];
-                if (!pd)
-                        continue;
-                if (pol && pol->pd_exit_fn)
-                        pol->pd_exit_fn(blkg);
-                kfree(pd);
-        }
        blk_exit_rl(&blkg->rl);
        kfree(blkg);
@@ -134,10 +104,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
                blkg->pd[i] = pd;
                pd->blkg = blkg;
                pd->plid = i;
-                /* invoke per-policy init */
-                if (pol->pd_init_fn)
-                        pol->pd_init_fn(blkg);
        }
        return blkg;
@@ -158,8 +124,8 @@ err_free:
 * @q's bypass state.  If @update_hint is %true, the caller should be
 * holding @q->queue_lock and lookup hint is updated on success.
 */
-static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
+struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
-                                      struct request_queue *q, bool update_hint)
+                               bool update_hint)
 {
        struct blkcg_gq *blkg;
@@ -234,16 +200,25 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
        }
        blkg = new_blkg;
-        /* link parent and insert */
+        /* link parent */
        if (blkcg_parent(blkcg)) {
                blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
                if (WARN_ON_ONCE(!blkg->parent)) {
-                        blkg = ERR_PTR(-EINVAL);
+                        ret = -EINVAL;
                        goto err_put_css;
                }
                blkg_get(blkg->parent);
        }
+        /* invoke per-policy init */
+        for (i = 0; i < BLKCG_MAX_POLS; i++) {
+                struct blkcg_policy *pol = blkcg_policy[i];
+                if (blkg->pd[i] && pol->pd_init_fn)
+                        pol->pd_init_fn(blkg);
+        }
+        /* insert */
        spin_lock(&blkcg->lock);
        ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
        if (likely(!ret)) {
@@ -394,30 +369,38 @@ static void blkg_destroy_all(struct request_queue *q)
        q->root_rl.blkg = NULL;
 }
-static void blkg_rcu_free(struct rcu_head *rcu_head)
+/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid.  For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+void __blkg_release_rcu(struct rcu_head *rcu_head)
 {
-        blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head));
+        struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
-}
+        int i;
+        /* tell policies that this one is being freed */
+        for (i = 0; i < BLKCG_MAX_POLS; i++) {
+                struct blkcg_policy *pol = blkcg_policy[i];
+                if (blkg->pd[i] && pol->pd_exit_fn)
+                        pol->pd_exit_fn(blkg);
+        }
-void __blkg_release(struct blkcg_gq *blkg)
-{
        /* release the blkcg and parent blkg refs this blkg has been holding */
        css_put(&blkg->blkcg->css);
-        if (blkg->parent)
+        if (blkg->parent) {
+                spin_lock_irq(blkg->q->queue_lock);
                blkg_put(blkg->parent);
+                spin_unlock_irq(blkg->q->queue_lock);
+        }
-        /*
+        blkg_free(blkg);
-         * A group is freed in rcu manner. But having an rcu lock does not
-         * mean that one can access all the fields of blkg and assume these
-         * are valid. For example, don't try to follow throtl_data and
-         * request queue links.
-         *
-         * Having a reference to blkg under an rcu allows acess to only
-         * values local to groups like group stats and group rate limits
-         */
-        call_rcu(&blkg->rcu_head, blkg_rcu_free);
 }
-EXPORT_SYMBOL_GPL(__blkg_release);
+EXPORT_SYMBOL_GPL(__blkg_release_rcu);
 /*
 * The next function used by blk_queue_for_each_rl().  It's a bit tricky
@@ -928,14 +911,6 @@ struct cgroup_subsys blkio_subsys = {
        .subsys_id = blkio_subsys_id,
        .base_cftypes = blkcg_files,
        .module = THIS_MODULE,
-        /*
-         * blkio subsystem is utterly broken in terms of hierarchy support.
-         * It treats all cgroups equally regardless of where they're
-         * located in the hierarchy - all cgroups are treated as if they're
-         * right below the root.  Fix it and remove the following.
-         */
-        .broken_hierarchy = true,
 };
 EXPORT_SYMBOL_GPL(blkio_subsys);
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-11 16:03:24 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-11 16:03:24 -0400
commit	36805aaea5ae3cf1bb32f1643e0a800bb69f0d5b (patch)
tree	5565132549a0733772b3a2ac6b5cda516ea8cdce /block/blk-cgroup.c
parent	6d2fa9e141ea56a571ec842fd4f3a86bea44a203 (diff)
parent	d50235b7bc3ee0a0427984d763ea7534149531b4 (diff)