aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/as-iosched.c8
-rw-r--r--drivers/block/deadline-iosched.c8
-rw-r--r--drivers/block/genhd.c25
-rw-r--r--drivers/block/ioctl.c74
-rw-r--r--drivers/block/ll_rw_blk.c111
-rw-r--r--drivers/block/loop.c81
-rw-r--r--drivers/block/pktcdvd.c36
7 files changed, 199 insertions, 144 deletions
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 638db06de2be..3410b4d294b9 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -1871,20 +1871,22 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
1871 if (!arq_pool) 1871 if (!arq_pool)
1872 return -ENOMEM; 1872 return -ENOMEM;
1873 1873
1874 ad = kmalloc(sizeof(*ad), GFP_KERNEL); 1874 ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node);
1875 if (!ad) 1875 if (!ad)
1876 return -ENOMEM; 1876 return -ENOMEM;
1877 memset(ad, 0, sizeof(*ad)); 1877 memset(ad, 0, sizeof(*ad));
1878 1878
1879 ad->q = q; /* Identify what queue the data belongs to */ 1879 ad->q = q; /* Identify what queue the data belongs to */
1880 1880
1881 ad->hash = kmalloc(sizeof(struct list_head)*AS_HASH_ENTRIES,GFP_KERNEL); 1881 ad->hash = kmalloc_node(sizeof(struct list_head)*AS_HASH_ENTRIES,
1882 GFP_KERNEL, q->node);
1882 if (!ad->hash) { 1883 if (!ad->hash) {
1883 kfree(ad); 1884 kfree(ad);
1884 return -ENOMEM; 1885 return -ENOMEM;
1885 } 1886 }
1886 1887
1887 ad->arq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, arq_pool); 1888 ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1889 mempool_free_slab, arq_pool, q->node);
1888 if (!ad->arq_pool) { 1890 if (!ad->arq_pool) {
1889 kfree(ad->hash); 1891 kfree(ad->hash);
1890 kfree(ad); 1892 kfree(ad);
diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
index 7f79f3dd0165..4bc2fea73273 100644
--- a/drivers/block/deadline-iosched.c
+++ b/drivers/block/deadline-iosched.c
@@ -711,18 +711,20 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
711 if (!drq_pool) 711 if (!drq_pool)
712 return -ENOMEM; 712 return -ENOMEM;
713 713
714 dd = kmalloc(sizeof(*dd), GFP_KERNEL); 714 dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
715 if (!dd) 715 if (!dd)
716 return -ENOMEM; 716 return -ENOMEM;
717 memset(dd, 0, sizeof(*dd)); 717 memset(dd, 0, sizeof(*dd));
718 718
719 dd->hash = kmalloc(sizeof(struct list_head)*DL_HASH_ENTRIES,GFP_KERNEL); 719 dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES,
720 GFP_KERNEL, q->node);
720 if (!dd->hash) { 721 if (!dd->hash) {
721 kfree(dd); 722 kfree(dd);
722 return -ENOMEM; 723 return -ENOMEM;
723 } 724 }
724 725
725 dd->drq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, drq_pool); 726 dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
727 mempool_free_slab, drq_pool, q->node);
726 if (!dd->drq_pool) { 728 if (!dd->drq_pool) {
727 kfree(dd->hash); 729 kfree(dd->hash);
728 kfree(dd); 730 kfree(dd);
diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c
index 53f7d846b747..47fd3659a061 100644
--- a/drivers/block/genhd.c
+++ b/drivers/block/genhd.c
@@ -40,7 +40,7 @@ static inline int major_to_index(int major)
40 40
41#ifdef CONFIG_PROC_FS 41#ifdef CONFIG_PROC_FS
42/* get block device names in somewhat random order */ 42/* get block device names in somewhat random order */
43int get_blkdev_list(char *p) 43int get_blkdev_list(char *p, int used)
44{ 44{
45 struct blk_major_name *n; 45 struct blk_major_name *n;
46 int i, len; 46 int i, len;
@@ -49,10 +49,18 @@ int get_blkdev_list(char *p)
49 49
50 down(&block_subsys_sem); 50 down(&block_subsys_sem);
51 for (i = 0; i < ARRAY_SIZE(major_names); i++) { 51 for (i = 0; i < ARRAY_SIZE(major_names); i++) {
52 for (n = major_names[i]; n; n = n->next) 52 for (n = major_names[i]; n; n = n->next) {
53 /*
54 * If the curent string plus the 5 extra characters
55 * in the line would run us off the page, then we're done
56 */
57 if ((len + used + strlen(n->name) + 5) >= PAGE_SIZE)
58 goto page_full;
53 len += sprintf(p+len, "%3d %s\n", 59 len += sprintf(p+len, "%3d %s\n",
54 n->major, n->name); 60 n->major, n->name);
61 }
55 } 62 }
63page_full:
56 up(&block_subsys_sem); 64 up(&block_subsys_sem);
57 65
58 return len; 66 return len;
@@ -582,10 +590,16 @@ struct seq_operations diskstats_op = {
582 .show = diskstats_show 590 .show = diskstats_show
583}; 591};
584 592
585
586struct gendisk *alloc_disk(int minors) 593struct gendisk *alloc_disk(int minors)
587{ 594{
588 struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL); 595 return alloc_disk_node(minors, -1);
596}
597
598struct gendisk *alloc_disk_node(int minors, int node_id)
599{
600 struct gendisk *disk;
601
602 disk = kmalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
589 if (disk) { 603 if (disk) {
590 memset(disk, 0, sizeof(struct gendisk)); 604 memset(disk, 0, sizeof(struct gendisk));
591 if (!init_disk_stats(disk)) { 605 if (!init_disk_stats(disk)) {
@@ -594,7 +608,7 @@ struct gendisk *alloc_disk(int minors)
594 } 608 }
595 if (minors > 1) { 609 if (minors > 1) {
596 int size = (minors - 1) * sizeof(struct hd_struct *); 610 int size = (minors - 1) * sizeof(struct hd_struct *);
597 disk->part = kmalloc(size, GFP_KERNEL); 611 disk->part = kmalloc_node(size, GFP_KERNEL, node_id);
598 if (!disk->part) { 612 if (!disk->part) {
599 kfree(disk); 613 kfree(disk);
600 return NULL; 614 return NULL;
@@ -610,6 +624,7 @@ struct gendisk *alloc_disk(int minors)
610} 624}
611 625
612EXPORT_SYMBOL(alloc_disk); 626EXPORT_SYMBOL(alloc_disk);
627EXPORT_SYMBOL(alloc_disk_node);
613 628
614struct kobject *get_disk(struct gendisk *disk) 629struct kobject *get_disk(struct gendisk *disk)
615{ 630{
diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
index 6d7bcc9da9e7..6e278474f9a8 100644
--- a/drivers/block/ioctl.c
+++ b/drivers/block/ioctl.c
@@ -133,11 +133,9 @@ static int put_u64(unsigned long arg, u64 val)
133 return put_user(val, (u64 __user *)arg); 133 return put_user(val, (u64 __user *)arg);
134} 134}
135 135
136int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, 136static int blkdev_locked_ioctl(struct file *file, struct block_device *bdev,
137 unsigned long arg) 137 unsigned cmd, unsigned long arg)
138{ 138{
139 struct block_device *bdev = inode->i_bdev;
140 struct gendisk *disk = bdev->bd_disk;
141 struct backing_dev_info *bdi; 139 struct backing_dev_info *bdi;
142 int ret, n; 140 int ret, n;
143 141
@@ -190,36 +188,72 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
190 return put_ulong(arg, bdev->bd_inode->i_size >> 9); 188 return put_ulong(arg, bdev->bd_inode->i_size >> 9);
191 case BLKGETSIZE64: 189 case BLKGETSIZE64:
192 return put_u64(arg, bdev->bd_inode->i_size); 190 return put_u64(arg, bdev->bd_inode->i_size);
191 }
192 return -ENOIOCTLCMD;
193}
194
195static int blkdev_driver_ioctl(struct inode *inode, struct file *file,
196 struct gendisk *disk, unsigned cmd, unsigned long arg)
197{
198 int ret;
199 if (disk->fops->unlocked_ioctl)
200 return disk->fops->unlocked_ioctl(file, cmd, arg);
201
202 if (disk->fops->ioctl) {
203 lock_kernel();
204 ret = disk->fops->ioctl(inode, file, cmd, arg);
205 unlock_kernel();
206 return ret;
207 }
208
209 return -ENOTTY;
210}
211
212int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
213 unsigned long arg)
214{
215 struct block_device *bdev = inode->i_bdev;
216 struct gendisk *disk = bdev->bd_disk;
217 int ret, n;
218
219 switch(cmd) {
193 case BLKFLSBUF: 220 case BLKFLSBUF:
194 if (!capable(CAP_SYS_ADMIN)) 221 if (!capable(CAP_SYS_ADMIN))
195 return -EACCES; 222 return -EACCES;
196 if (disk->fops->ioctl) { 223
197 ret = disk->fops->ioctl(inode, file, cmd, arg); 224 ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg);
198 /* -EINVAL to handle old uncorrected drivers */ 225 /* -EINVAL to handle old uncorrected drivers */
199 if (ret != -EINVAL && ret != -ENOTTY) 226 if (ret != -EINVAL && ret != -ENOTTY)
200 return ret; 227 return ret;
201 } 228
229 lock_kernel();
202 fsync_bdev(bdev); 230 fsync_bdev(bdev);
203 invalidate_bdev(bdev, 0); 231 invalidate_bdev(bdev, 0);
232 unlock_kernel();
204 return 0; 233 return 0;
234
205 case BLKROSET: 235 case BLKROSET:
206 if (disk->fops->ioctl) { 236 ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg);
207 ret = disk->fops->ioctl(inode, file, cmd, arg); 237 /* -EINVAL to handle old uncorrected drivers */
208 /* -EINVAL to handle old uncorrected drivers */ 238 if (ret != -EINVAL && ret != -ENOTTY)
209 if (ret != -EINVAL && ret != -ENOTTY) 239 return ret;
210 return ret;
211 }
212 if (!capable(CAP_SYS_ADMIN)) 240 if (!capable(CAP_SYS_ADMIN))
213 return -EACCES; 241 return -EACCES;
214 if (get_user(n, (int __user *)(arg))) 242 if (get_user(n, (int __user *)(arg)))
215 return -EFAULT; 243 return -EFAULT;
244 lock_kernel();
216 set_device_ro(bdev, n); 245 set_device_ro(bdev, n);
246 unlock_kernel();
217 return 0; 247 return 0;
218 default:
219 if (disk->fops->ioctl)
220 return disk->fops->ioctl(inode, file, cmd, arg);
221 } 248 }
222 return -ENOTTY; 249
250 lock_kernel();
251 ret = blkdev_locked_ioctl(file, bdev, cmd, arg);
252 unlock_kernel();
253 if (ret != -ENOIOCTLCMD)
254 return ret;
255
256 return blkdev_driver_ioctl(inode, file, disk, cmd, arg);
223} 257}
224 258
225/* Most of the generic ioctls are handled in the normal fallback path. 259/* Most of the generic ioctls are handled in the normal fallback path.
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 81fe3a0c1fe7..fd94ea27d594 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -28,6 +28,7 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/swap.h> 29#include <linux/swap.h>
30#include <linux/writeback.h> 30#include <linux/writeback.h>
31#include <linux/blkdev.h>
31 32
32/* 33/*
33 * for max sense size 34 * for max sense size
@@ -716,7 +717,7 @@ struct request *blk_queue_find_tag(request_queue_t *q, int tag)
716{ 717{
717 struct blk_queue_tag *bqt = q->queue_tags; 718 struct blk_queue_tag *bqt = q->queue_tags;
718 719
719 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) 720 if (unlikely(bqt == NULL || tag >= bqt->max_depth))
720 return NULL; 721 return NULL;
721 722
722 return bqt->tag_index[tag]; 723 return bqt->tag_index[tag];
@@ -774,9 +775,9 @@ EXPORT_SYMBOL(blk_queue_free_tags);
774static int 775static int
775init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) 776init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
776{ 777{
777 int bits, i;
778 struct request **tag_index; 778 struct request **tag_index;
779 unsigned long *tag_map; 779 unsigned long *tag_map;
780 int nr_ulongs;
780 781
781 if (depth > q->nr_requests * 2) { 782 if (depth > q->nr_requests * 2) {
782 depth = q->nr_requests * 2; 783 depth = q->nr_requests * 2;
@@ -788,24 +789,17 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
788 if (!tag_index) 789 if (!tag_index)
789 goto fail; 790 goto fail;
790 791
791 bits = (depth / BLK_TAGS_PER_LONG) + 1; 792 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
792 tag_map = kmalloc(bits * sizeof(unsigned long), GFP_ATOMIC); 793 tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
793 if (!tag_map) 794 if (!tag_map)
794 goto fail; 795 goto fail;
795 796
796 memset(tag_index, 0, depth * sizeof(struct request *)); 797 memset(tag_index, 0, depth * sizeof(struct request *));
797 memset(tag_map, 0, bits * sizeof(unsigned long)); 798 memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));
798 tags->max_depth = depth; 799 tags->max_depth = depth;
799 tags->real_max_depth = bits * BITS_PER_LONG;
800 tags->tag_index = tag_index; 800 tags->tag_index = tag_index;
801 tags->tag_map = tag_map; 801 tags->tag_map = tag_map;
802 802
803 /*
804 * set the upper bits if the depth isn't a multiple of the word size
805 */
806 for (i = depth; i < bits * BLK_TAGS_PER_LONG; i++)
807 __set_bit(i, tag_map);
808
809 return 0; 803 return 0;
810fail: 804fail:
811 kfree(tag_index); 805 kfree(tag_index);
@@ -870,32 +864,24 @@ int blk_queue_resize_tags(request_queue_t *q, int new_depth)
870 struct blk_queue_tag *bqt = q->queue_tags; 864 struct blk_queue_tag *bqt = q->queue_tags;
871 struct request **tag_index; 865 struct request **tag_index;
872 unsigned long *tag_map; 866 unsigned long *tag_map;
873 int bits, max_depth; 867 int max_depth, nr_ulongs;
874 868
875 if (!bqt) 869 if (!bqt)
876 return -ENXIO; 870 return -ENXIO;
877 871
878 /* 872 /*
879 * don't bother sizing down
880 */
881 if (new_depth <= bqt->real_max_depth) {
882 bqt->max_depth = new_depth;
883 return 0;
884 }
885
886 /*
887 * save the old state info, so we can copy it back 873 * save the old state info, so we can copy it back
888 */ 874 */
889 tag_index = bqt->tag_index; 875 tag_index = bqt->tag_index;
890 tag_map = bqt->tag_map; 876 tag_map = bqt->tag_map;
891 max_depth = bqt->real_max_depth; 877 max_depth = bqt->max_depth;
892 878
893 if (init_tag_map(q, bqt, new_depth)) 879 if (init_tag_map(q, bqt, new_depth))
894 return -ENOMEM; 880 return -ENOMEM;
895 881
896 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); 882 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
897 bits = max_depth / BLK_TAGS_PER_LONG; 883 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
898 memcpy(bqt->tag_map, tag_map, bits * sizeof(unsigned long)); 884 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
899 885
900 kfree(tag_index); 886 kfree(tag_index);
901 kfree(tag_map); 887 kfree(tag_map);
@@ -925,11 +911,16 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq)
925 911
926 BUG_ON(tag == -1); 912 BUG_ON(tag == -1);
927 913
928 if (unlikely(tag >= bqt->real_max_depth)) 914 if (unlikely(tag >= bqt->max_depth))
915 /*
916 * This can happen after tag depth has been reduced.
917 * FIXME: how about a warning or info message here?
918 */
929 return; 919 return;
930 920
931 if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) { 921 if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {
932 printk("attempt to clear non-busy tag (%d)\n", tag); 922 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
923 __FUNCTION__, tag);
933 return; 924 return;
934 } 925 }
935 926
@@ -938,7 +929,8 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq)
938 rq->tag = -1; 929 rq->tag = -1;
939 930
940 if (unlikely(bqt->tag_index[tag] == NULL)) 931 if (unlikely(bqt->tag_index[tag] == NULL))
941 printk("tag %d is missing\n", tag); 932 printk(KERN_ERR "%s: tag %d is missing\n",
933 __FUNCTION__, tag);
942 934
943 bqt->tag_index[tag] = NULL; 935 bqt->tag_index[tag] = NULL;
944 bqt->busy--; 936 bqt->busy--;
@@ -967,24 +959,20 @@ EXPORT_SYMBOL(blk_queue_end_tag);
967int blk_queue_start_tag(request_queue_t *q, struct request *rq) 959int blk_queue_start_tag(request_queue_t *q, struct request *rq)
968{ 960{
969 struct blk_queue_tag *bqt = q->queue_tags; 961 struct blk_queue_tag *bqt = q->queue_tags;
970 unsigned long *map = bqt->tag_map; 962 int tag;
971 int tag = 0;
972 963
973 if (unlikely((rq->flags & REQ_QUEUED))) { 964 if (unlikely((rq->flags & REQ_QUEUED))) {
974 printk(KERN_ERR 965 printk(KERN_ERR
975 "request %p for device [%s] already tagged %d", 966 "%s: request %p for device [%s] already tagged %d",
976 rq, rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); 967 __FUNCTION__, rq,
968 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
977 BUG(); 969 BUG();
978 } 970 }
979 971
980 for (map = bqt->tag_map; *map == -1UL; map++) { 972 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
981 tag += BLK_TAGS_PER_LONG; 973 if (tag >= bqt->max_depth)
982 974 return 1;
983 if (tag >= bqt->max_depth)
984 return 1;
985 }
986 975
987 tag += ffz(*map);
988 __set_bit(tag, bqt->tag_map); 976 __set_bit(tag, bqt->tag_map);
989 977
990 rq->flags |= REQ_QUEUED; 978 rq->flags |= REQ_QUEUED;
@@ -1020,7 +1008,8 @@ void blk_queue_invalidate_tags(request_queue_t *q)
1020 rq = list_entry_rq(tmp); 1008 rq = list_entry_rq(tmp);
1021 1009
1022 if (rq->tag == -1) { 1010 if (rq->tag == -1) {
1023 printk("bad tag found on list\n"); 1011 printk(KERN_ERR
1012 "%s: bad tag found on list\n", __FUNCTION__);
1024 list_del_init(&rq->queuelist); 1013 list_del_init(&rq->queuelist);
1025 rq->flags &= ~REQ_QUEUED; 1014 rq->flags &= ~REQ_QUEUED;
1026 } else 1015 } else
@@ -1450,7 +1439,7 @@ EXPORT_SYMBOL(blk_remove_plug);
1450 */ 1439 */
1451void __generic_unplug_device(request_queue_t *q) 1440void __generic_unplug_device(request_queue_t *q)
1452{ 1441{
1453 if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) 1442 if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)))
1454 return; 1443 return;
1455 1444
1456 if (!blk_remove_plug(q)) 1445 if (!blk_remove_plug(q))
@@ -1645,7 +1634,8 @@ static int blk_init_free_list(request_queue_t *q)
1645 init_waitqueue_head(&rl->wait[WRITE]); 1634 init_waitqueue_head(&rl->wait[WRITE]);
1646 init_waitqueue_head(&rl->drain); 1635 init_waitqueue_head(&rl->drain);
1647 1636
1648 rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep); 1637 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1638 mempool_free_slab, request_cachep, q->node);
1649 1639
1650 if (!rl->rq_pool) 1640 if (!rl->rq_pool)
1651 return -ENOMEM; 1641 return -ENOMEM;
@@ -1657,8 +1647,15 @@ static int __make_request(request_queue_t *, struct bio *);
1657 1647
1658request_queue_t *blk_alloc_queue(int gfp_mask) 1648request_queue_t *blk_alloc_queue(int gfp_mask)
1659{ 1649{
1660 request_queue_t *q = kmem_cache_alloc(requestq_cachep, gfp_mask); 1650 return blk_alloc_queue_node(gfp_mask, -1);
1651}
1652EXPORT_SYMBOL(blk_alloc_queue);
1653
1654request_queue_t *blk_alloc_queue_node(int gfp_mask, int node_id)
1655{
1656 request_queue_t *q;
1661 1657
1658 q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);
1662 if (!q) 1659 if (!q)
1663 return NULL; 1660 return NULL;
1664 1661
@@ -1671,8 +1668,7 @@ request_queue_t *blk_alloc_queue(int gfp_mask)
1671 1668
1672 return q; 1669 return q;
1673} 1670}
1674 1671EXPORT_SYMBOL(blk_alloc_queue_node);
1675EXPORT_SYMBOL(blk_alloc_queue);
1676 1672
1677/** 1673/**
1678 * blk_init_queue - prepare a request queue for use with a block device 1674 * blk_init_queue - prepare a request queue for use with a block device
@@ -1705,13 +1701,22 @@ EXPORT_SYMBOL(blk_alloc_queue);
1705 * blk_init_queue() must be paired with a blk_cleanup_queue() call 1701 * blk_init_queue() must be paired with a blk_cleanup_queue() call
1706 * when the block device is deactivated (such as at module unload). 1702 * when the block device is deactivated (such as at module unload).
1707 **/ 1703 **/
1704
1708request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) 1705request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1709{ 1706{
1710 request_queue_t *q = blk_alloc_queue(GFP_KERNEL); 1707 return blk_init_queue_node(rfn, lock, -1);
1708}
1709EXPORT_SYMBOL(blk_init_queue);
1710
1711request_queue_t *
1712blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1713{
1714 request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
1711 1715
1712 if (!q) 1716 if (!q)
1713 return NULL; 1717 return NULL;
1714 1718
1719 q->node = node_id;
1715 if (blk_init_free_list(q)) 1720 if (blk_init_free_list(q))
1716 goto out_init; 1721 goto out_init;
1717 1722
@@ -1754,12 +1759,11 @@ out_init:
1754 kmem_cache_free(requestq_cachep, q); 1759 kmem_cache_free(requestq_cachep, q);
1755 return NULL; 1760 return NULL;
1756} 1761}
1757 1762EXPORT_SYMBOL(blk_init_queue_node);
1758EXPORT_SYMBOL(blk_init_queue);
1759 1763
1760int blk_get_queue(request_queue_t *q) 1764int blk_get_queue(request_queue_t *q)
1761{ 1765{
1762 if (!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { 1766 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
1763 atomic_inc(&q->refcnt); 1767 atomic_inc(&q->refcnt);
1764 return 0; 1768 return 0;
1765 } 1769 }
@@ -1838,7 +1842,6 @@ static void __freed_request(request_queue_t *q, int rw)
1838 clear_queue_congested(q, rw); 1842 clear_queue_congested(q, rw);
1839 1843
1840 if (rl->count[rw] + 1 <= q->nr_requests) { 1844 if (rl->count[rw] + 1 <= q->nr_requests) {
1841 smp_mb();
1842 if (waitqueue_active(&rl->wait[rw])) 1845 if (waitqueue_active(&rl->wait[rw]))
1843 wake_up(&rl->wait[rw]); 1846 wake_up(&rl->wait[rw]);
1844 1847
@@ -1966,7 +1969,6 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
1966 DEFINE_WAIT(wait); 1969 DEFINE_WAIT(wait);
1967 struct request *rq; 1970 struct request *rq;
1968 1971
1969 generic_unplug_device(q);
1970 do { 1972 do {
1971 struct request_list *rl = &q->rq; 1973 struct request_list *rl = &q->rq;
1972 1974
@@ -1978,6 +1980,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
1978 if (!rq) { 1980 if (!rq) {
1979 struct io_context *ioc; 1981 struct io_context *ioc;
1980 1982
1983 generic_unplug_device(q);
1981 io_schedule(); 1984 io_schedule();
1982 1985
1983 /* 1986 /*
@@ -2581,7 +2584,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
2581 spin_lock_prefetch(q->queue_lock); 2584 spin_lock_prefetch(q->queue_lock);
2582 2585
2583 barrier = bio_barrier(bio); 2586 barrier = bio_barrier(bio);
2584 if (barrier && (q->ordered == QUEUE_ORDERED_NONE)) { 2587 if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {
2585 err = -EOPNOTSUPP; 2588 err = -EOPNOTSUPP;
2586 goto end_io; 2589 goto end_io;
2587 } 2590 }
@@ -2682,7 +2685,7 @@ get_rq:
2682 /* 2685 /*
2683 * REQ_BARRIER implies no merging, but lets make it explicit 2686 * REQ_BARRIER implies no merging, but lets make it explicit
2684 */ 2687 */
2685 if (barrier) 2688 if (unlikely(barrier))
2686 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 2689 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2687 2690
2688 req->errors = 0; 2691 req->errors = 0;
@@ -2806,7 +2809,7 @@ static inline void block_wait_queue_running(request_queue_t *q)
2806{ 2809{
2807 DEFINE_WAIT(wait); 2810 DEFINE_WAIT(wait);
2808 2811
2809 while (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) { 2812 while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
2810 struct request_list *rl = &q->rq; 2813 struct request_list *rl = &q->rq;
2811 2814
2812 prepare_to_wait_exclusive(&rl->drain, &wait, 2815 prepare_to_wait_exclusive(&rl->drain, &wait,
@@ -2915,7 +2918,7 @@ end_io:
2915 goto end_io; 2918 goto end_io;
2916 } 2919 }
2917 2920
2918 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) 2921 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
2919 goto end_io; 2922 goto end_io;
2920 2923
2921 block_wait_queue_running(q); 2924 block_wait_queue_running(q);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 6f011d0d8e97..b35e08876dd4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -472,17 +472,11 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
472 */ 472 */
473static void loop_add_bio(struct loop_device *lo, struct bio *bio) 473static void loop_add_bio(struct loop_device *lo, struct bio *bio)
474{ 474{
475 unsigned long flags;
476
477 spin_lock_irqsave(&lo->lo_lock, flags);
478 if (lo->lo_biotail) { 475 if (lo->lo_biotail) {
479 lo->lo_biotail->bi_next = bio; 476 lo->lo_biotail->bi_next = bio;
480 lo->lo_biotail = bio; 477 lo->lo_biotail = bio;
481 } else 478 } else
482 lo->lo_bio = lo->lo_biotail = bio; 479 lo->lo_bio = lo->lo_biotail = bio;
483 spin_unlock_irqrestore(&lo->lo_lock, flags);
484
485 up(&lo->lo_bh_mutex);
486} 480}
487 481
488/* 482/*
@@ -492,14 +486,12 @@ static struct bio *loop_get_bio(struct loop_device *lo)
492{ 486{
493 struct bio *bio; 487 struct bio *bio;
494 488
495 spin_lock_irq(&lo->lo_lock);
496 if ((bio = lo->lo_bio)) { 489 if ((bio = lo->lo_bio)) {
497 if (bio == lo->lo_biotail) 490 if (bio == lo->lo_biotail)
498 lo->lo_biotail = NULL; 491 lo->lo_biotail = NULL;
499 lo->lo_bio = bio->bi_next; 492 lo->lo_bio = bio->bi_next;
500 bio->bi_next = NULL; 493 bio->bi_next = NULL;
501 } 494 }
502 spin_unlock_irq(&lo->lo_lock);
503 495
504 return bio; 496 return bio;
505} 497}
@@ -509,35 +501,28 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio)
509 struct loop_device *lo = q->queuedata; 501 struct loop_device *lo = q->queuedata;
510 int rw = bio_rw(old_bio); 502 int rw = bio_rw(old_bio);
511 503
512 if (!lo) 504 if (rw == READA)
513 goto out; 505 rw = READ;
506
507 BUG_ON(!lo || (rw != READ && rw != WRITE));
514 508
515 spin_lock_irq(&lo->lo_lock); 509 spin_lock_irq(&lo->lo_lock);
516 if (lo->lo_state != Lo_bound) 510 if (lo->lo_state != Lo_bound)
517 goto inactive; 511 goto out;
518 atomic_inc(&lo->lo_pending); 512 if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
519 spin_unlock_irq(&lo->lo_lock); 513 goto out;
520 514 lo->lo_pending++;
521 if (rw == WRITE) {
522 if (lo->lo_flags & LO_FLAGS_READ_ONLY)
523 goto err;
524 } else if (rw == READA) {
525 rw = READ;
526 } else if (rw != READ) {
527 printk(KERN_ERR "loop: unknown command (%x)\n", rw);
528 goto err;
529 }
530 loop_add_bio(lo, old_bio); 515 loop_add_bio(lo, old_bio);
516 spin_unlock_irq(&lo->lo_lock);
517 up(&lo->lo_bh_mutex);
531 return 0; 518 return 0;
532err: 519
533 if (atomic_dec_and_test(&lo->lo_pending))
534 up(&lo->lo_bh_mutex);
535out: 520out:
521 if (lo->lo_pending == 0)
522 up(&lo->lo_bh_mutex);
523 spin_unlock_irq(&lo->lo_lock);
536 bio_io_error(old_bio, old_bio->bi_size); 524 bio_io_error(old_bio, old_bio->bi_size);
537 return 0; 525 return 0;
538inactive:
539 spin_unlock_irq(&lo->lo_lock);
540 goto out;
541} 526}
542 527
543/* 528/*
@@ -560,13 +545,11 @@ static void do_loop_switch(struct loop_device *, struct switch_request *);
560 545
561static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) 546static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
562{ 547{
563 int ret;
564
565 if (unlikely(!bio->bi_bdev)) { 548 if (unlikely(!bio->bi_bdev)) {
566 do_loop_switch(lo, bio->bi_private); 549 do_loop_switch(lo, bio->bi_private);
567 bio_put(bio); 550 bio_put(bio);
568 } else { 551 } else {
569 ret = do_bio_filebacked(lo, bio); 552 int ret = do_bio_filebacked(lo, bio);
570 bio_endio(bio, bio->bi_size, ret); 553 bio_endio(bio, bio->bi_size, ret);
571 } 554 }
572} 555}
@@ -594,7 +577,7 @@ static int loop_thread(void *data)
594 set_user_nice(current, -20); 577 set_user_nice(current, -20);
595 578
596 lo->lo_state = Lo_bound; 579 lo->lo_state = Lo_bound;
597 atomic_inc(&lo->lo_pending); 580 lo->lo_pending = 1;
598 581
599 /* 582 /*
600 * up sem, we are running 583 * up sem, we are running
@@ -602,26 +585,37 @@ static int loop_thread(void *data)
602 up(&lo->lo_sem); 585 up(&lo->lo_sem);
603 586
604 for (;;) { 587 for (;;) {
605 down_interruptible(&lo->lo_bh_mutex); 588 int pending;
589
606 /* 590 /*
607 * could be upped because of tear-down, not because of 591 * interruptible just to not contribute to load avg
608 * pending work
609 */ 592 */
610 if (!atomic_read(&lo->lo_pending)) 593 if (down_interruptible(&lo->lo_bh_mutex))
594 continue;
595
596 spin_lock_irq(&lo->lo_lock);
597
598 /*
599 * could be upped because of tear-down, not pending work
600 */
601 if (unlikely(!lo->lo_pending)) {
602 spin_unlock_irq(&lo->lo_lock);
611 break; 603 break;
604 }
612 605
613 bio = loop_get_bio(lo); 606 bio = loop_get_bio(lo);
614 if (!bio) { 607 lo->lo_pending--;
615 printk("loop: missing bio\n"); 608 pending = lo->lo_pending;
616 continue; 609 spin_unlock_irq(&lo->lo_lock);
617 } 610
611 BUG_ON(!bio);
618 loop_handle_bio(lo, bio); 612 loop_handle_bio(lo, bio);
619 613
620 /* 614 /*
621 * upped both for pending work and tear-down, lo_pending 615 * upped both for pending work and tear-down, lo_pending
622 * will hit zero then 616 * will hit zero then
623 */ 617 */
624 if (atomic_dec_and_test(&lo->lo_pending)) 618 if (unlikely(!pending))
625 break; 619 break;
626 } 620 }
627 621
@@ -900,7 +894,8 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
900 894
901 spin_lock_irq(&lo->lo_lock); 895 spin_lock_irq(&lo->lo_lock);
902 lo->lo_state = Lo_rundown; 896 lo->lo_state = Lo_rundown;
903 if (atomic_dec_and_test(&lo->lo_pending)) 897 lo->lo_pending--;
898 if (!lo->lo_pending)
904 up(&lo->lo_bh_mutex); 899 up(&lo->lo_bh_mutex);
905 spin_unlock_irq(&lo->lo_lock); 900 spin_unlock_irq(&lo->lo_lock);
906 901
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index bc56770bcc90..7f3d78de265c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -467,14 +467,12 @@ static int pkt_set_speed(struct pktcdvd_device *pd, unsigned write_speed, unsign
467 * Queue a bio for processing by the low-level CD device. Must be called 467 * Queue a bio for processing by the low-level CD device. Must be called
468 * from process context. 468 * from process context.
469 */ 469 */
470static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio, int high_prio_read) 470static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
471{ 471{
472 spin_lock(&pd->iosched.lock); 472 spin_lock(&pd->iosched.lock);
473 if (bio_data_dir(bio) == READ) { 473 if (bio_data_dir(bio) == READ) {
474 pkt_add_list_last(bio, &pd->iosched.read_queue, 474 pkt_add_list_last(bio, &pd->iosched.read_queue,
475 &pd->iosched.read_queue_tail); 475 &pd->iosched.read_queue_tail);
476 if (high_prio_read)
477 pd->iosched.high_prio_read = 1;
478 } else { 476 } else {
479 pkt_add_list_last(bio, &pd->iosched.write_queue, 477 pkt_add_list_last(bio, &pd->iosched.write_queue,
480 &pd->iosched.write_queue_tail); 478 &pd->iosched.write_queue_tail);
@@ -490,15 +488,16 @@ static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio, int high_p
490 * requirements for CDRW drives: 488 * requirements for CDRW drives:
491 * - A cache flush command must be inserted before a read request if the 489 * - A cache flush command must be inserted before a read request if the
492 * previous request was a write. 490 * previous request was a write.
493 * - Switching between reading and writing is slow, so don't it more often 491 * - Switching between reading and writing is slow, so don't do it more often
494 * than necessary. 492 * than necessary.
493 * - Optimize for throughput at the expense of latency. This means that streaming
494 * writes will never be interrupted by a read, but if the drive has to seek
495 * before the next write, switch to reading instead if there are any pending
496 * read requests.
495 * - Set the read speed according to current usage pattern. When only reading 497 * - Set the read speed according to current usage pattern. When only reading
496 * from the device, it's best to use the highest possible read speed, but 498 * from the device, it's best to use the highest possible read speed, but
497 * when switching often between reading and writing, it's better to have the 499 * when switching often between reading and writing, it's better to have the
498 * same read and write speeds. 500 * same read and write speeds.
499 * - Reads originating from user space should have higher priority than reads
500 * originating from pkt_gather_data, because some process is usually waiting
501 * on reads of the first kind.
502 */ 501 */
503static void pkt_iosched_process_queue(struct pktcdvd_device *pd) 502static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
504{ 503{
@@ -512,21 +511,24 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
512 511
513 for (;;) { 512 for (;;) {
514 struct bio *bio; 513 struct bio *bio;
515 int reads_queued, writes_queued, high_prio_read; 514 int reads_queued, writes_queued;
516 515
517 spin_lock(&pd->iosched.lock); 516 spin_lock(&pd->iosched.lock);
518 reads_queued = (pd->iosched.read_queue != NULL); 517 reads_queued = (pd->iosched.read_queue != NULL);
519 writes_queued = (pd->iosched.write_queue != NULL); 518 writes_queued = (pd->iosched.write_queue != NULL);
520 if (!reads_queued)
521 pd->iosched.high_prio_read = 0;
522 high_prio_read = pd->iosched.high_prio_read;
523 spin_unlock(&pd->iosched.lock); 519 spin_unlock(&pd->iosched.lock);
524 520
525 if (!reads_queued && !writes_queued) 521 if (!reads_queued && !writes_queued)
526 break; 522 break;
527 523
528 if (pd->iosched.writing) { 524 if (pd->iosched.writing) {
529 if (high_prio_read || (!writes_queued && reads_queued)) { 525 int need_write_seek = 1;
526 spin_lock(&pd->iosched.lock);
527 bio = pd->iosched.write_queue;
528 spin_unlock(&pd->iosched.lock);
529 if (bio && (bio->bi_sector == pd->iosched.last_write))
530 need_write_seek = 0;
531 if (need_write_seek && reads_queued) {
530 if (atomic_read(&pd->cdrw.pending_bios) > 0) { 532 if (atomic_read(&pd->cdrw.pending_bios) > 0) {
531 VPRINTK("pktcdvd: write, waiting\n"); 533 VPRINTK("pktcdvd: write, waiting\n");
532 break; 534 break;
@@ -559,8 +561,10 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
559 561
560 if (bio_data_dir(bio) == READ) 562 if (bio_data_dir(bio) == READ)
561 pd->iosched.successive_reads += bio->bi_size >> 10; 563 pd->iosched.successive_reads += bio->bi_size >> 10;
562 else 564 else {
563 pd->iosched.successive_reads = 0; 565 pd->iosched.successive_reads = 0;
566 pd->iosched.last_write = bio->bi_sector + bio_sectors(bio);
567 }
564 if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { 568 if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
565 if (pd->read_speed == pd->write_speed) { 569 if (pd->read_speed == pd->write_speed) {
566 pd->read_speed = MAX_SPEED; 570 pd->read_speed = MAX_SPEED;
@@ -765,7 +769,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
765 769
766 atomic_inc(&pkt->io_wait); 770 atomic_inc(&pkt->io_wait);
767 bio->bi_rw = READ; 771 bio->bi_rw = READ;
768 pkt_queue_bio(pd, bio, 0); 772 pkt_queue_bio(pd, bio);
769 frames_read++; 773 frames_read++;
770 } 774 }
771 775
@@ -1062,7 +1066,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1062 1066
1063 atomic_set(&pkt->io_wait, 1); 1067 atomic_set(&pkt->io_wait, 1);
1064 pkt->w_bio->bi_rw = WRITE; 1068 pkt->w_bio->bi_rw = WRITE;
1065 pkt_queue_bio(pd, pkt->w_bio, 0); 1069 pkt_queue_bio(pd, pkt->w_bio);
1066} 1070}
1067 1071
1068static void pkt_finish_packet(struct packet_data *pkt, int uptodate) 1072static void pkt_finish_packet(struct packet_data *pkt, int uptodate)
@@ -2120,7 +2124,7 @@ static int pkt_make_request(request_queue_t *q, struct bio *bio)
2120 cloned_bio->bi_private = psd; 2124 cloned_bio->bi_private = psd;
2121 cloned_bio->bi_end_io = pkt_end_io_read_cloned; 2125 cloned_bio->bi_end_io = pkt_end_io_read_cloned;
2122 pd->stats.secs_r += bio->bi_size >> 9; 2126 pd->stats.secs_r += bio->bi_size >> 9;
2123 pkt_queue_bio(pd, cloned_bio, 1); 2127 pkt_queue_bio(pd, cloned_bio);
2124 return 0; 2128 return 0;
2125 } 2129 }
2126 2130