aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-12-08 05:36:16 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-08 11:28:39 -0500
commit37be41241f43109c55d92cdbf303399eea642f14 (patch)
tree6bc6b0367103d08bdd812cd5c0047f952c839829
parentfd27c7a1bfa9801544ca961cdb4ca0b4998580b8 (diff)
[PATCH] lockdep: simplify some aspects of bd_mutex nesting
When we open (actually blkdev_get) a partition we need to also open (get) the whole device that holds the partition. The involves some limited recursion. This patch tries to simplify some aspects of this. As well as opening the whole device, we need to increment ->bd_part_count when a partition is opened (this is used by rescan_partitions to avoid a rescan if any partition is active, as that would be confusing). The main change this patch makes is to move the inc/dec of bd_part_count into blkdev_{get,put} for the whole rather than doing it in blkdev_{get,put} for the partition. More specifically, we introduce __blkdev_get and __blkdev_put which do exactly what blkdev_{get,put} did, only with an extra "for_part" argument (blkget_{get,put} then call the __ version with a '0' for the extra argument). If for_part is 1, then the blkdev is being get(put) because a partition is being opened(closed) for the first(last) time, and so bd_part_count should be updated (on success). The particular advantage of pushing this function down is that the bd_mutex lock (which is needed to update bd_part_count) is already held at the lower level. Note that this slightly changes the semantics of bd_part_count. Instead of updating it whenever a partition is opened or released, it is now only updated on the first open or last release. This is an adequate semantic as it is only ever tested for "== 0". Having introduced these functions we remove the current bd_part_count updates from do_open (which is really the body of blkdev_get) and call __blkdev_get(... 1). Similarly in blkget_put we remove the old bd_part_count updates and call __blkget_put(..., 1). This call is moved to the end of __blkdev_put to avoid nested locks of bd_mutex. Finally the mutex_lock on whole->bd_mutex in do_open can be removed. It was only really needed to protect bd_part_count, and that is now managed (and protected) within the recursive call. The observation that bd_part_count is central to the locking issues, and the modifications to create __blkdev_put are from Peter Zijlstra. Cc: Ingo Molnar <mingo@elte.hu> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/block_dev.c51
1 files changed, 29 insertions, 22 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 36c38f48a4ed..19f5f153ddb8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -900,7 +900,10 @@ void bd_set_size(struct block_device *bdev, loff_t size)
900} 900}
901EXPORT_SYMBOL(bd_set_size); 901EXPORT_SYMBOL(bd_set_size);
902 902
903static int do_open(struct block_device *bdev, struct file *file) 903static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags,
904 int for_part);
905
906static int do_open(struct block_device *bdev, struct file *file, int for_part)
904{ 907{
905 struct module *owner = NULL; 908 struct module *owner = NULL;
906 struct gendisk *disk; 909 struct gendisk *disk;
@@ -944,25 +947,21 @@ static int do_open(struct block_device *bdev, struct file *file)
944 ret = -ENOMEM; 947 ret = -ENOMEM;
945 if (!whole) 948 if (!whole)
946 goto out_first; 949 goto out_first;
947 ret = blkdev_get(whole, file->f_mode, file->f_flags); 950 BUG_ON(for_part);
951 ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1);
948 if (ret) 952 if (ret)
949 goto out_first; 953 goto out_first;
950 bdev->bd_contains = whole; 954 bdev->bd_contains = whole;
951 mutex_lock(&whole->bd_mutex);
952 whole->bd_part_count++;
953 p = disk->part[part - 1]; 955 p = disk->part[part - 1];
954 bdev->bd_inode->i_data.backing_dev_info = 956 bdev->bd_inode->i_data.backing_dev_info =
955 whole->bd_inode->i_data.backing_dev_info; 957 whole->bd_inode->i_data.backing_dev_info;
956 if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { 958 if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
957 whole->bd_part_count--;
958 mutex_unlock(&whole->bd_mutex);
959 ret = -ENXIO; 959 ret = -ENXIO;
960 goto out_first; 960 goto out_first;
961 } 961 }
962 kobject_get(&p->kobj); 962 kobject_get(&p->kobj);
963 bdev->bd_part = p; 963 bdev->bd_part = p;
964 bd_set_size(bdev, (loff_t) p->nr_sects << 9); 964 bd_set_size(bdev, (loff_t) p->nr_sects << 9);
965 mutex_unlock(&whole->bd_mutex);
966 } 965 }
967 } else { 966 } else {
968 put_disk(disk); 967 put_disk(disk);
@@ -975,13 +974,11 @@ static int do_open(struct block_device *bdev, struct file *file)
975 } 974 }
976 if (bdev->bd_invalidated) 975 if (bdev->bd_invalidated)
977 rescan_partitions(bdev->bd_disk, bdev); 976 rescan_partitions(bdev->bd_disk, bdev);
978 } else {
979 mutex_lock(&bdev->bd_contains->bd_mutex);
980 bdev->bd_contains->bd_part_count++;
981 mutex_unlock(&bdev->bd_contains->bd_mutex);
982 } 977 }
983 } 978 }
984 bdev->bd_openers++; 979 bdev->bd_openers++;
980 if (for_part)
981 bdev->bd_part_count++;
985 mutex_unlock(&bdev->bd_mutex); 982 mutex_unlock(&bdev->bd_mutex);
986 unlock_kernel(); 983 unlock_kernel();
987 return 0; 984 return 0;
@@ -1002,7 +999,8 @@ out:
1002 return ret; 999 return ret;
1003} 1000}
1004 1001
1005int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) 1002static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags,
1003 int for_part)
1006{ 1004{
1007 /* 1005 /*
1008 * This crockload is due to bad choice of ->open() type. 1006 * This crockload is due to bad choice of ->open() type.
@@ -1017,9 +1015,13 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
1017 fake_file.f_dentry = &fake_dentry; 1015 fake_file.f_dentry = &fake_dentry;
1018 fake_dentry.d_inode = bdev->bd_inode; 1016 fake_dentry.d_inode = bdev->bd_inode;
1019 1017
1020 return do_open(bdev, &fake_file); 1018 return do_open(bdev, &fake_file, for_part);
1021} 1019}
1022 1020
1021int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
1022{
1023 return __blkdev_get(bdev, mode, flags, 0);
1024}
1023EXPORT_SYMBOL(blkdev_get); 1025EXPORT_SYMBOL(blkdev_get);
1024 1026
1025static int blkdev_open(struct inode * inode, struct file * filp) 1027static int blkdev_open(struct inode * inode, struct file * filp)
@@ -1039,7 +1041,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1039 if (bdev == NULL) 1041 if (bdev == NULL)
1040 return -ENOMEM; 1042 return -ENOMEM;
1041 1043
1042 res = do_open(bdev, filp); 1044 res = do_open(bdev, filp, 0);
1043 if (res) 1045 if (res)
1044 return res; 1046 return res;
1045 1047
@@ -1053,14 +1055,18 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1053 return res; 1055 return res;
1054} 1056}
1055 1057
1056int blkdev_put(struct block_device *bdev) 1058static int __blkdev_put(struct block_device *bdev, int for_part)
1057{ 1059{
1058 int ret = 0; 1060 int ret = 0;
1059 struct inode *bd_inode = bdev->bd_inode; 1061 struct inode *bd_inode = bdev->bd_inode;
1060 struct gendisk *disk = bdev->bd_disk; 1062 struct gendisk *disk = bdev->bd_disk;
1063 struct block_device *victim = NULL;
1061 1064
1062 mutex_lock(&bdev->bd_mutex); 1065 mutex_lock(&bdev->bd_mutex);
1063 lock_kernel(); 1066 lock_kernel();
1067 if (for_part)
1068 bdev->bd_part_count--;
1069
1064 if (!--bdev->bd_openers) { 1070 if (!--bdev->bd_openers) {
1065 sync_blockdev(bdev); 1071 sync_blockdev(bdev);
1066 kill_bdev(bdev); 1072 kill_bdev(bdev);
@@ -1068,10 +1074,6 @@ int blkdev_put(struct block_device *bdev)
1068 if (bdev->bd_contains == bdev) { 1074 if (bdev->bd_contains == bdev) {
1069 if (disk->fops->release) 1075 if (disk->fops->release)
1070 ret = disk->fops->release(bd_inode, NULL); 1076 ret = disk->fops->release(bd_inode, NULL);
1071 } else {
1072 mutex_lock(&bdev->bd_contains->bd_mutex);
1073 bdev->bd_contains->bd_part_count--;
1074 mutex_unlock(&bdev->bd_contains->bd_mutex);
1075 } 1077 }
1076 if (!bdev->bd_openers) { 1078 if (!bdev->bd_openers) {
1077 struct module *owner = disk->fops->owner; 1079 struct module *owner = disk->fops->owner;
@@ -1085,17 +1087,22 @@ int blkdev_put(struct block_device *bdev)
1085 } 1087 }
1086 bdev->bd_disk = NULL; 1088 bdev->bd_disk = NULL;
1087 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1089 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1088 if (bdev != bdev->bd_contains) { 1090 if (bdev != bdev->bd_contains)
1089 blkdev_put(bdev->bd_contains); 1091 victim = bdev->bd_contains;
1090 }
1091 bdev->bd_contains = NULL; 1092 bdev->bd_contains = NULL;
1092 } 1093 }
1093 unlock_kernel(); 1094 unlock_kernel();
1094 mutex_unlock(&bdev->bd_mutex); 1095 mutex_unlock(&bdev->bd_mutex);
1095 bdput(bdev); 1096 bdput(bdev);
1097 if (victim)
1098 __blkdev_put(victim, 1);
1096 return ret; 1099 return ret;
1097} 1100}
1098 1101
1102int blkdev_put(struct block_device *bdev)
1103{
1104 return __blkdev_put(bdev, 0);
1105}
1099EXPORT_SYMBOL(blkdev_put); 1106EXPORT_SYMBOL(blkdev_put);
1100 1107
1101static int blkdev_close(struct inode * inode, struct file * filp) 1108static int blkdev_close(struct inode * inode, struct file * filp)