aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-06-10 15:17:02 -0400
committerChris Mason <chris.mason@oracle.com>2009-06-10 15:17:02 -0400
commite5e9a5206a171b2c467e494aebcdcf70c47289bc (patch)
treed9a6a89cccbd084db923f1661b52d5a1dfdb83fe /fs
parent7df336ec1266dccbb253bac52c529d3dcc7c22d0 (diff)
Btrfs: avoid races between super writeout and device list updates
On multi-device filesystems, btrfs writes supers to all of the devices before considering a sync complete. There wasn't any additional locking between super writeout and the device list management code because device management was done inside a transaction and super writeout only happened with no transation writers running. With the btrfs fsync log and other async transaction updates, this has been racey for some time. This adds a mutex to protect the device list. The existing volume mutex could not be reused due to transaction lock ordering requirements. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/volumes.c34
-rw-r--r--fs/btrfs/volumes.h7
3 files changed, 45 insertions, 2 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6c54c210dfd0..b7ddc77fa568 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2111,7 +2111,7 @@ static int write_dev_supers(struct btrfs_device *device,
2111 2111
2112int write_all_supers(struct btrfs_root *root, int max_mirrors) 2112int write_all_supers(struct btrfs_root *root, int max_mirrors)
2113{ 2113{
2114 struct list_head *head = &root->fs_info->fs_devices->devices; 2114 struct list_head *head;
2115 struct btrfs_device *dev; 2115 struct btrfs_device *dev;
2116 struct btrfs_super_block *sb; 2116 struct btrfs_super_block *sb;
2117 struct btrfs_dev_item *dev_item; 2117 struct btrfs_dev_item *dev_item;
@@ -2126,6 +2126,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2126 2126
2127 sb = &root->fs_info->super_for_commit; 2127 sb = &root->fs_info->super_for_commit;
2128 dev_item = &sb->dev_item; 2128 dev_item = &sb->dev_item;
2129
2130 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2131 head = &root->fs_info->fs_devices->devices;
2129 list_for_each_entry(dev, head, dev_list) { 2132 list_for_each_entry(dev, head, dev_list) {
2130 if (!dev->bdev) { 2133 if (!dev->bdev) {
2131 total_errors++; 2134 total_errors++;
@@ -2169,6 +2172,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2169 if (ret) 2172 if (ret)
2170 total_errors++; 2173 total_errors++;
2171 } 2174 }
2175 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2172 if (total_errors > max_errors) { 2176 if (total_errors > max_errors) {
2173 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2177 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2174 total_errors); 2178 total_errors);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3f4a5932eac9..3ab80e9cd767 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -377,6 +377,7 @@ static noinline int device_list_add(const char *path,
377 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); 377 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
378 fs_devices->latest_devid = devid; 378 fs_devices->latest_devid = devid;
379 fs_devices->latest_trans = found_transid; 379 fs_devices->latest_trans = found_transid;
380 mutex_init(&fs_devices->device_list_mutex);
380 device = NULL; 381 device = NULL;
381 } else { 382 } else {
382 device = __find_device(&fs_devices->devices, devid, 383 device = __find_device(&fs_devices->devices, devid,
@@ -403,7 +404,11 @@ static noinline int device_list_add(const char *path,
403 return -ENOMEM; 404 return -ENOMEM;
404 } 405 }
405 INIT_LIST_HEAD(&device->dev_alloc_list); 406 INIT_LIST_HEAD(&device->dev_alloc_list);
407
408 mutex_lock(&fs_devices->device_list_mutex);
406 list_add(&device->dev_list, &fs_devices->devices); 409 list_add(&device->dev_list, &fs_devices->devices);
410 mutex_unlock(&fs_devices->device_list_mutex);
411
407 device->fs_devices = fs_devices; 412 device->fs_devices = fs_devices;
408 fs_devices->num_devices++; 413 fs_devices->num_devices++;
409 } 414 }
@@ -429,10 +434,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
429 INIT_LIST_HEAD(&fs_devices->devices); 434 INIT_LIST_HEAD(&fs_devices->devices);
430 INIT_LIST_HEAD(&fs_devices->alloc_list); 435 INIT_LIST_HEAD(&fs_devices->alloc_list);
431 INIT_LIST_HEAD(&fs_devices->list); 436 INIT_LIST_HEAD(&fs_devices->list);
437 mutex_init(&fs_devices->device_list_mutex);
432 fs_devices->latest_devid = orig->latest_devid; 438 fs_devices->latest_devid = orig->latest_devid;
433 fs_devices->latest_trans = orig->latest_trans; 439 fs_devices->latest_trans = orig->latest_trans;
434 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); 440 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
435 441
442 mutex_lock(&orig->device_list_mutex);
436 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 443 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
437 device = kzalloc(sizeof(*device), GFP_NOFS); 444 device = kzalloc(sizeof(*device), GFP_NOFS);
438 if (!device) 445 if (!device)
@@ -454,8 +461,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
454 device->fs_devices = fs_devices; 461 device->fs_devices = fs_devices;
455 fs_devices->num_devices++; 462 fs_devices->num_devices++;
456 } 463 }
464 mutex_unlock(&orig->device_list_mutex);
457 return fs_devices; 465 return fs_devices;
458error: 466error:
467 mutex_unlock(&orig->device_list_mutex);
459 free_fs_devices(fs_devices); 468 free_fs_devices(fs_devices);
460 return ERR_PTR(-ENOMEM); 469 return ERR_PTR(-ENOMEM);
461} 470}
@@ -466,6 +475,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
466 475
467 mutex_lock(&uuid_mutex); 476 mutex_lock(&uuid_mutex);
468again: 477again:
478 mutex_lock(&fs_devices->device_list_mutex);
469 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 479 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
470 if (device->in_fs_metadata) 480 if (device->in_fs_metadata)
471 continue; 481 continue;
@@ -485,6 +495,7 @@ again:
485 kfree(device->name); 495 kfree(device->name);
486 kfree(device); 496 kfree(device);
487 } 497 }
498 mutex_unlock(&fs_devices->device_list_mutex);
488 499
489 if (fs_devices->seed) { 500 if (fs_devices->seed) {
490 fs_devices = fs_devices->seed; 501 fs_devices = fs_devices->seed;
@@ -1135,12 +1146,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1135 1146
1136 device = NULL; 1147 device = NULL;
1137 devices = &root->fs_info->fs_devices->devices; 1148 devices = &root->fs_info->fs_devices->devices;
1149 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1138 list_for_each_entry(tmp, devices, dev_list) { 1150 list_for_each_entry(tmp, devices, dev_list) {
1139 if (tmp->in_fs_metadata && !tmp->bdev) { 1151 if (tmp->in_fs_metadata && !tmp->bdev) {
1140 device = tmp; 1152 device = tmp;
1141 break; 1153 break;
1142 } 1154 }
1143 } 1155 }
1156 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1144 bdev = NULL; 1157 bdev = NULL;
1145 bh = NULL; 1158 bh = NULL;
1146 disk_super = NULL; 1159 disk_super = NULL;
@@ -1195,7 +1208,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1195 goto error_brelse; 1208 goto error_brelse;
1196 1209
1197 device->in_fs_metadata = 0; 1210 device->in_fs_metadata = 0;
1211
1212 /*
1213 * the device list mutex makes sure that we don't change
1214 * the device list while someone else is writing out all
1215 * the device supers.
1216 */
1217 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1198 list_del_init(&device->dev_list); 1218 list_del_init(&device->dev_list);
1219 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1220
1199 device->fs_devices->num_devices--; 1221 device->fs_devices->num_devices--;
1200 1222
1201 next_device = list_entry(root->fs_info->fs_devices->devices.next, 1223 next_device = list_entry(root->fs_info->fs_devices->devices.next,
@@ -1289,6 +1311,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1289 seed_devices->opened = 1; 1311 seed_devices->opened = 1;
1290 INIT_LIST_HEAD(&seed_devices->devices); 1312 INIT_LIST_HEAD(&seed_devices->devices);
1291 INIT_LIST_HEAD(&seed_devices->alloc_list); 1313 INIT_LIST_HEAD(&seed_devices->alloc_list);
1314 mutex_init(&seed_devices->device_list_mutex);
1292 list_splice_init(&fs_devices->devices, &seed_devices->devices); 1315 list_splice_init(&fs_devices->devices, &seed_devices->devices);
1293 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); 1316 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1294 list_for_each_entry(device, &seed_devices->devices, dev_list) { 1317 list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1414,6 +1437,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1414 mutex_lock(&root->fs_info->volume_mutex); 1437 mutex_lock(&root->fs_info->volume_mutex);
1415 1438
1416 devices = &root->fs_info->fs_devices->devices; 1439 devices = &root->fs_info->fs_devices->devices;
1440 /*
1441 * we have the volume lock, so we don't need the extra
1442 * device list mutex while reading the list here.
1443 */
1417 list_for_each_entry(device, devices, dev_list) { 1444 list_for_each_entry(device, devices, dev_list) {
1418 if (device->bdev == bdev) { 1445 if (device->bdev == bdev) {
1419 ret = -EEXIST; 1446 ret = -EEXIST;
@@ -1468,6 +1495,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1468 } 1495 }
1469 1496
1470 device->fs_devices = root->fs_info->fs_devices; 1497 device->fs_devices = root->fs_info->fs_devices;
1498
1499 /*
1500 * we don't want write_supers to jump in here with our device
1501 * half setup
1502 */
1503 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1471 list_add(&device->dev_list, &root->fs_info->fs_devices->devices); 1504 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
1472 list_add(&device->dev_alloc_list, 1505 list_add(&device->dev_alloc_list,
1473 &root->fs_info->fs_devices->alloc_list); 1506 &root->fs_info->fs_devices->alloc_list);
@@ -1486,6 +1519,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1486 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); 1519 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
1487 btrfs_set_super_num_devices(&root->fs_info->super_copy, 1520 btrfs_set_super_num_devices(&root->fs_info->super_copy,
1488 total_bytes + 1); 1521 total_bytes + 1);
1522 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1489 1523
1490 if (seeding_dev) { 1524 if (seeding_dev) {
1491 ret = init_first_rw_device(trans, root, device); 1525 ret = init_first_rw_device(trans, root, device);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3c1f7310421e..5139a833f721 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -96,7 +96,12 @@ struct btrfs_fs_devices {
96 u64 rw_devices; 96 u64 rw_devices;
97 u64 total_rw_bytes; 97 u64 total_rw_bytes;
98 struct block_device *latest_bdev; 98 struct block_device *latest_bdev;
99 /* all of the devices in the FS */ 99
100 /* all of the devices in the FS, protected by a mutex
101 * so we can safely walk it to write out the supers without
102 * worrying about add/remove by the multi-device code
103 */
104 struct mutex device_list_mutex;
100 struct list_head devices; 105 struct list_head devices;
101 106
102 /* devices not currently being allocated */ 107 /* devices not currently being allocated */