aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-10 07:26:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-10 07:26:02 -0400
commitc798360cd1438090d51eeaa8e67985da11362eba (patch)
tree0107d3b9ee7476264c3357287787d393545bd2d9 /block
parentb211e9d7c861bdb37b86d6384da9edfb80949ceb (diff)
parent6ae833c7fe0c6ef1f0ab13cc775da230d6f4c256 (diff)
Merge branch 'for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Pull percpu updates from Tejun Heo: "A lot of activities on percpu front. Notable changes are... - percpu allocator now can take @gfp. If @gfp doesn't contain GFP_KERNEL, it tries to allocate from what's already available to the allocator and a work item tries to keep the reserve around certain level so that these atomic allocations usually succeed. This will replace the ad-hoc percpu memory pool used by blk-throttle and also be used by the planned blkcg support for writeback IOs. Please note that I noticed a bug in how @gfp is interpreted while preparing this pull request and applied the fix 6ae833c7fe0c ("percpu: fix how @gfp is interpreted by the percpu allocator") just now. - percpu_ref now uses longs for percpu and global counters instead of ints. It leads to more sparse packing of the percpu counters on 64bit machines but the overhead should be negligible and this allows using percpu_ref for refcnting pages and in-memory objects directly. - The switching between percpu and single counter modes of a percpu_ref is made independent of putting the base ref and a percpu_ref can now optionally be initialized in single or killed mode. This allows avoiding percpu shutdown latency for cases where the refcounted objects may be synchronously created and destroyed in rapid succession with only a fraction of them reaching fully operational status (SCSI probing does this when combined with blk-mq support). It's also planned to be used to implement forced single mode to detect underflow more timely for debugging. There's a separate branch percpu/for-3.18-consistent-ops which cleans up the duplicate percpu accessors. That branch causes a number of conflicts with s390 and other trees. I'll send a separate pull request w/ resolutions once other branches are merged" * 'for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (33 commits) percpu: fix how @gfp is interpreted by the percpu allocator blk-mq, percpu_ref: start q->mq_usage_counter in atomic mode percpu_ref: make INIT_ATOMIC and switch_to_atomic() sticky percpu_ref: add PERCPU_REF_INIT_* flags percpu_ref: decouple switching to percpu mode and reinit percpu_ref: decouple switching to atomic mode and killing percpu_ref: add PCPU_REF_DEAD percpu_ref: rename things to prepare for decoupling percpu/atomic mode switch percpu_ref: replace pcpu_ prefix with percpu_ percpu_ref: minor code and comment updates percpu_ref: relocate percpu_ref_reinit() Revert "blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during probe" Revert "percpu: free percpu allocation info for uniprocessor system" percpu-refcount: make percpu_ref based on longs instead of ints percpu-refcount: improve WARN messages percpu: fix locking regression in the failure path of pcpu_alloc() percpu-refcount: add @gfp to percpu_ref_init() proportions: add @gfp to init functions percpu_counter: add @gfp to percpu_counter_init() percpu_counter: make percpu_counters_lock irq-safe ...
Diffstat (limited to 'block')
-rw-r--r--block/blk-mq-sysfs.c6
-rw-r--r--block/blk-mq.c18
-rw-r--r--block/blk-sysfs.c11
3 files changed, 22 insertions, 13 deletions
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index ed5217867555..371d8800b48a 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -402,6 +402,12 @@ static void blk_mq_sysfs_init(struct request_queue *q)
402 } 402 }
403} 403}
404 404
405/* see blk_register_queue() */
406void blk_mq_finish_init(struct request_queue *q)
407{
408 percpu_ref_switch_to_percpu(&q->mq_usage_counter);
409}
410
405int blk_mq_register_disk(struct gendisk *disk) 411int blk_mq_register_disk(struct gendisk *disk)
406{ 412{
407 struct device *dev = disk_to_dev(disk); 413 struct device *dev = disk_to_dev(disk);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index df8e1e09dd17..38f4a165640d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -119,16 +119,7 @@ void blk_mq_freeze_queue(struct request_queue *q)
119 spin_unlock_irq(q->queue_lock); 119 spin_unlock_irq(q->queue_lock);
120 120
121 if (freeze) { 121 if (freeze) {
122 /* 122 percpu_ref_kill(&q->mq_usage_counter);
123 * XXX: Temporary kludge to work around SCSI blk-mq stall.
124 * SCSI synchronously creates and destroys many queues
125 * back-to-back during probe leading to lengthy stalls.
126 * This will be fixed by keeping ->mq_usage_counter in
127 * atomic mode until genhd registration, but, for now,
128 * let's work around using expedited synchronization.
129 */
130 __percpu_ref_kill_expedited(&q->mq_usage_counter);
131
132 blk_mq_run_queues(q, false); 123 blk_mq_run_queues(q, false);
133 } 124 }
134 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); 125 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
@@ -1804,7 +1795,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1804 if (!q) 1795 if (!q)
1805 goto err_hctxs; 1796 goto err_hctxs;
1806 1797
1807 if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release)) 1798 /*
1799 * Init percpu_ref in atomic mode so that it's faster to shutdown.
1800 * See blk_register_queue() for details.
1801 */
1802 if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
1803 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
1808 goto err_map; 1804 goto err_map;
1809 1805
1810 setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); 1806 setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 17f5c84ce7bf..521ae9089c50 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -551,12 +551,19 @@ int blk_register_queue(struct gendisk *disk)
551 return -ENXIO; 551 return -ENXIO;
552 552
553 /* 553 /*
554 * Initialization must be complete by now. Finish the initial 554 * SCSI probing may synchronously create and destroy a lot of
555 * bypass from queue allocation. 555 * request_queues for non-existent devices. Shutting down a fully
556 * functional queue takes measureable wallclock time as RCU grace
557 * periods are involved. To avoid excessive latency in these
558 * cases, a request_queue starts out in a degraded mode which is
559 * faster to shut down and is made fully functional here as
560 * request_queues for non-existent devices never get registered.
556 */ 561 */
557 if (!blk_queue_init_done(q)) { 562 if (!blk_queue_init_done(q)) {
558 queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); 563 queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
559 blk_queue_bypass_end(q); 564 blk_queue_bypass_end(q);
565 if (q->mq_ops)
566 blk_mq_finish_init(q);
560 } 567 }
561 568
562 ret = blk_trace_init_sysfs(dev); 569 ret = blk_trace_init_sysfs(dev);