aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorYasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>2010-10-19 03:05:00 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-10-19 03:07:02 -0400
commit7681bfeeccff5efa9eb29bf09249a3c400b15327 (patch)
tree8557964a2df96e253dcf1a61734b98dbfbf192d6 /fs
parent495d2b3883682fcd1c3dee3a45e38fd00154ae25 (diff)
block: fix accounting bug on cross partition merges
/proc/diskstats would display a strange output as follows. $ cat /proc/diskstats |grep sda 8 0 sda 90524 7579 102154 20464 0 0 0 0 0 14096 20089 8 1 sda1 19085 1352 21841 4209 0 0 0 0 4294967064 15689 4293424691 ~~~~~~~~~~ 8 2 sda2 71252 3624 74891 15950 0 0 0 0 232 23995 1562390 8 3 sda3 54 487 2188 92 0 0 0 0 0 88 92 8 4 sda4 4 0 8 0 0 0 0 0 0 0 0 8 5 sda5 81 2027 2130 138 0 0 0 0 0 87 137 Its reason is the wrong way of accounting hd_struct->in_flight. When a bio is merged into a request belongs to different partition by ELEVATOR_FRONT_MERGE. The detailed root cause is as follows. Assuming that there are two partition, sda1 and sda2. 1. A request for sda2 is in request_queue. Hence sda1's hd_struct->in_flight is 0 and sda2's one is 1. | hd_struct->in_flight --------------------------- sda1 | 0 sda2 | 1 --------------------------- 2. A bio belongs to sda1 is issued and is merged into the request mentioned on step1 by ELEVATOR_BACK_MERGE. The first sector of the request is changed from sda2 region to sda1 region. However the two partition's hd_struct->in_flight are not changed. | hd_struct->in_flight --------------------------- sda1 | 0 sda2 | 1 --------------------------- 3. The request is finished and blk_account_io_done() is called. In this case, sda2's hd_struct->in_flight, not a sda1's one, is decremented. | hd_struct->in_flight --------------------------- sda1 | -1 sda2 | 1 --------------------------- The patch fixes the problem by caching the partition lookup inside the request structure, hence making sure that the increment and decrement will always happen on the same partition struct. This also speeds up IO with accounting enabled, since it cuts down on the number of lookups we have to do. When reloading partition tables, quiesce IO to ensure that no request references to the partition struct exists. When it is safe to free the partition table, the IO for that device is restarted again. Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: stable@kernel.org Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/partitions/check.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6dfbee03ccc6..30f46c2cb9d5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -365,17 +365,25 @@ struct device_type part_type = {
365static void delete_partition_rcu_cb(struct rcu_head *head) 365static void delete_partition_rcu_cb(struct rcu_head *head)
366{ 366{
367 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); 367 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
368 struct gendisk *disk = part_to_disk(part);
369 struct request_queue *q = disk->queue;
370 unsigned long flags;
368 371
369 part->start_sect = 0; 372 part->start_sect = 0;
370 part->nr_sects = 0; 373 part->nr_sects = 0;
371 part_stat_set_all(part, 0); 374 part_stat_set_all(part, 0);
372 put_device(part_to_dev(part)); 375 put_device(part_to_dev(part));
376
377 spin_lock_irqsave(q->queue_lock, flags);
378 elv_quiesce_end(q);
379 spin_unlock_irqrestore(q->queue_lock, flags);
373} 380}
374 381
375void delete_partition(struct gendisk *disk, int partno) 382void delete_partition(struct gendisk *disk, int partno)
376{ 383{
377 struct disk_part_tbl *ptbl = disk->part_tbl; 384 struct disk_part_tbl *ptbl = disk->part_tbl;
378 struct hd_struct *part; 385 struct hd_struct *part;
386 struct request_queue *q = disk->queue;
379 387
380 if (partno >= ptbl->len) 388 if (partno >= ptbl->len)
381 return; 389 return;
@@ -390,6 +398,10 @@ void delete_partition(struct gendisk *disk, int partno)
390 kobject_put(part->holder_dir); 398 kobject_put(part->holder_dir);
391 device_del(part_to_dev(part)); 399 device_del(part_to_dev(part));
392 400
401 spin_lock_irq(q->queue_lock);
402 elv_quiesce_start(q);
403 spin_unlock_irq(q->queue_lock);
404
393 call_rcu(&part->rcu_head, delete_partition_rcu_cb); 405 call_rcu(&part->rcu_head, delete_partition_rcu_cb);
394} 406}
395 407