diff options
author | Chris Mason <chris.mason@oracle.com> | 2009-06-10 15:17:02 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-06-10 15:17:02 -0400 |
commit | e5e9a5206a171b2c467e494aebcdcf70c47289bc (patch) | |
tree | d9a6a89cccbd084db923f1661b52d5a1dfdb83fe /fs | |
parent | 7df336ec1266dccbb253bac52c529d3dcc7c22d0 (diff) |
Btrfs: avoid races between super writeout and device list updates
On multi-device filesystems, btrfs writes supers to all of the devices
before considering a sync complete. There wasn't any additional
locking between super writeout and the device list management code
because device management was done inside a transaction and
super writeout only happened with no transation writers running.
With the btrfs fsync log and other async transaction updates, this
has been racey for some time. This adds a mutex to protect
the device list. The existing volume mutex could not be reused due to
transaction lock ordering requirements.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/disk-io.c | 6 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 34 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 7 |
3 files changed, 45 insertions, 2 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c54c210dfd0..b7ddc77fa568 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -2111,7 +2111,7 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2111 | 2111 | ||
2112 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 2112 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
2113 | { | 2113 | { |
2114 | struct list_head *head = &root->fs_info->fs_devices->devices; | 2114 | struct list_head *head; |
2115 | struct btrfs_device *dev; | 2115 | struct btrfs_device *dev; |
2116 | struct btrfs_super_block *sb; | 2116 | struct btrfs_super_block *sb; |
2117 | struct btrfs_dev_item *dev_item; | 2117 | struct btrfs_dev_item *dev_item; |
@@ -2126,6 +2126,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2126 | 2126 | ||
2127 | sb = &root->fs_info->super_for_commit; | 2127 | sb = &root->fs_info->super_for_commit; |
2128 | dev_item = &sb->dev_item; | 2128 | dev_item = &sb->dev_item; |
2129 | |||
2130 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
2131 | head = &root->fs_info->fs_devices->devices; | ||
2129 | list_for_each_entry(dev, head, dev_list) { | 2132 | list_for_each_entry(dev, head, dev_list) { |
2130 | if (!dev->bdev) { | 2133 | if (!dev->bdev) { |
2131 | total_errors++; | 2134 | total_errors++; |
@@ -2169,6 +2172,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2169 | if (ret) | 2172 | if (ret) |
2170 | total_errors++; | 2173 | total_errors++; |
2171 | } | 2174 | } |
2175 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
2172 | if (total_errors > max_errors) { | 2176 | if (total_errors > max_errors) { |
2173 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", | 2177 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", |
2174 | total_errors); | 2178 | total_errors); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3f4a5932eac9..3ab80e9cd767 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -377,6 +377,7 @@ static noinline int device_list_add(const char *path, | |||
377 | memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); | 377 | memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); |
378 | fs_devices->latest_devid = devid; | 378 | fs_devices->latest_devid = devid; |
379 | fs_devices->latest_trans = found_transid; | 379 | fs_devices->latest_trans = found_transid; |
380 | mutex_init(&fs_devices->device_list_mutex); | ||
380 | device = NULL; | 381 | device = NULL; |
381 | } else { | 382 | } else { |
382 | device = __find_device(&fs_devices->devices, devid, | 383 | device = __find_device(&fs_devices->devices, devid, |
@@ -403,7 +404,11 @@ static noinline int device_list_add(const char *path, | |||
403 | return -ENOMEM; | 404 | return -ENOMEM; |
404 | } | 405 | } |
405 | INIT_LIST_HEAD(&device->dev_alloc_list); | 406 | INIT_LIST_HEAD(&device->dev_alloc_list); |
407 | |||
408 | mutex_lock(&fs_devices->device_list_mutex); | ||
406 | list_add(&device->dev_list, &fs_devices->devices); | 409 | list_add(&device->dev_list, &fs_devices->devices); |
410 | mutex_unlock(&fs_devices->device_list_mutex); | ||
411 | |||
407 | device->fs_devices = fs_devices; | 412 | device->fs_devices = fs_devices; |
408 | fs_devices->num_devices++; | 413 | fs_devices->num_devices++; |
409 | } | 414 | } |
@@ -429,10 +434,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
429 | INIT_LIST_HEAD(&fs_devices->devices); | 434 | INIT_LIST_HEAD(&fs_devices->devices); |
430 | INIT_LIST_HEAD(&fs_devices->alloc_list); | 435 | INIT_LIST_HEAD(&fs_devices->alloc_list); |
431 | INIT_LIST_HEAD(&fs_devices->list); | 436 | INIT_LIST_HEAD(&fs_devices->list); |
437 | mutex_init(&fs_devices->device_list_mutex); | ||
432 | fs_devices->latest_devid = orig->latest_devid; | 438 | fs_devices->latest_devid = orig->latest_devid; |
433 | fs_devices->latest_trans = orig->latest_trans; | 439 | fs_devices->latest_trans = orig->latest_trans; |
434 | memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); | 440 | memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); |
435 | 441 | ||
442 | mutex_lock(&orig->device_list_mutex); | ||
436 | list_for_each_entry(orig_dev, &orig->devices, dev_list) { | 443 | list_for_each_entry(orig_dev, &orig->devices, dev_list) { |
437 | device = kzalloc(sizeof(*device), GFP_NOFS); | 444 | device = kzalloc(sizeof(*device), GFP_NOFS); |
438 | if (!device) | 445 | if (!device) |
@@ -454,8 +461,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
454 | device->fs_devices = fs_devices; | 461 | device->fs_devices = fs_devices; |
455 | fs_devices->num_devices++; | 462 | fs_devices->num_devices++; |
456 | } | 463 | } |
464 | mutex_unlock(&orig->device_list_mutex); | ||
457 | return fs_devices; | 465 | return fs_devices; |
458 | error: | 466 | error: |
467 | mutex_unlock(&orig->device_list_mutex); | ||
459 | free_fs_devices(fs_devices); | 468 | free_fs_devices(fs_devices); |
460 | return ERR_PTR(-ENOMEM); | 469 | return ERR_PTR(-ENOMEM); |
461 | } | 470 | } |
@@ -466,6 +475,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) | |||
466 | 475 | ||
467 | mutex_lock(&uuid_mutex); | 476 | mutex_lock(&uuid_mutex); |
468 | again: | 477 | again: |
478 | mutex_lock(&fs_devices->device_list_mutex); | ||
469 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | 479 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
470 | if (device->in_fs_metadata) | 480 | if (device->in_fs_metadata) |
471 | continue; | 481 | continue; |
@@ -485,6 +495,7 @@ again: | |||
485 | kfree(device->name); | 495 | kfree(device->name); |
486 | kfree(device); | 496 | kfree(device); |
487 | } | 497 | } |
498 | mutex_unlock(&fs_devices->device_list_mutex); | ||
488 | 499 | ||
489 | if (fs_devices->seed) { | 500 | if (fs_devices->seed) { |
490 | fs_devices = fs_devices->seed; | 501 | fs_devices = fs_devices->seed; |
@@ -1135,12 +1146,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1135 | 1146 | ||
1136 | device = NULL; | 1147 | device = NULL; |
1137 | devices = &root->fs_info->fs_devices->devices; | 1148 | devices = &root->fs_info->fs_devices->devices; |
1149 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1138 | list_for_each_entry(tmp, devices, dev_list) { | 1150 | list_for_each_entry(tmp, devices, dev_list) { |
1139 | if (tmp->in_fs_metadata && !tmp->bdev) { | 1151 | if (tmp->in_fs_metadata && !tmp->bdev) { |
1140 | device = tmp; | 1152 | device = tmp; |
1141 | break; | 1153 | break; |
1142 | } | 1154 | } |
1143 | } | 1155 | } |
1156 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1144 | bdev = NULL; | 1157 | bdev = NULL; |
1145 | bh = NULL; | 1158 | bh = NULL; |
1146 | disk_super = NULL; | 1159 | disk_super = NULL; |
@@ -1195,7 +1208,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1195 | goto error_brelse; | 1208 | goto error_brelse; |
1196 | 1209 | ||
1197 | device->in_fs_metadata = 0; | 1210 | device->in_fs_metadata = 0; |
1211 | |||
1212 | /* | ||
1213 | * the device list mutex makes sure that we don't change | ||
1214 | * the device list while someone else is writing out all | ||
1215 | * the device supers. | ||
1216 | */ | ||
1217 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1198 | list_del_init(&device->dev_list); | 1218 | list_del_init(&device->dev_list); |
1219 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1220 | |||
1199 | device->fs_devices->num_devices--; | 1221 | device->fs_devices->num_devices--; |
1200 | 1222 | ||
1201 | next_device = list_entry(root->fs_info->fs_devices->devices.next, | 1223 | next_device = list_entry(root->fs_info->fs_devices->devices.next, |
@@ -1289,6 +1311,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, | |||
1289 | seed_devices->opened = 1; | 1311 | seed_devices->opened = 1; |
1290 | INIT_LIST_HEAD(&seed_devices->devices); | 1312 | INIT_LIST_HEAD(&seed_devices->devices); |
1291 | INIT_LIST_HEAD(&seed_devices->alloc_list); | 1313 | INIT_LIST_HEAD(&seed_devices->alloc_list); |
1314 | mutex_init(&seed_devices->device_list_mutex); | ||
1292 | list_splice_init(&fs_devices->devices, &seed_devices->devices); | 1315 | list_splice_init(&fs_devices->devices, &seed_devices->devices); |
1293 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); | 1316 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); |
1294 | list_for_each_entry(device, &seed_devices->devices, dev_list) { | 1317 | list_for_each_entry(device, &seed_devices->devices, dev_list) { |
@@ -1414,6 +1437,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1414 | mutex_lock(&root->fs_info->volume_mutex); | 1437 | mutex_lock(&root->fs_info->volume_mutex); |
1415 | 1438 | ||
1416 | devices = &root->fs_info->fs_devices->devices; | 1439 | devices = &root->fs_info->fs_devices->devices; |
1440 | /* | ||
1441 | * we have the volume lock, so we don't need the extra | ||
1442 | * device list mutex while reading the list here. | ||
1443 | */ | ||
1417 | list_for_each_entry(device, devices, dev_list) { | 1444 | list_for_each_entry(device, devices, dev_list) { |
1418 | if (device->bdev == bdev) { | 1445 | if (device->bdev == bdev) { |
1419 | ret = -EEXIST; | 1446 | ret = -EEXIST; |
@@ -1468,6 +1495,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1468 | } | 1495 | } |
1469 | 1496 | ||
1470 | device->fs_devices = root->fs_info->fs_devices; | 1497 | device->fs_devices = root->fs_info->fs_devices; |
1498 | |||
1499 | /* | ||
1500 | * we don't want write_supers to jump in here with our device | ||
1501 | * half setup | ||
1502 | */ | ||
1503 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1471 | list_add(&device->dev_list, &root->fs_info->fs_devices->devices); | 1504 | list_add(&device->dev_list, &root->fs_info->fs_devices->devices); |
1472 | list_add(&device->dev_alloc_list, | 1505 | list_add(&device->dev_alloc_list, |
1473 | &root->fs_info->fs_devices->alloc_list); | 1506 | &root->fs_info->fs_devices->alloc_list); |
@@ -1486,6 +1519,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1486 | total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); | 1519 | total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); |
1487 | btrfs_set_super_num_devices(&root->fs_info->super_copy, | 1520 | btrfs_set_super_num_devices(&root->fs_info->super_copy, |
1488 | total_bytes + 1); | 1521 | total_bytes + 1); |
1522 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1489 | 1523 | ||
1490 | if (seeding_dev) { | 1524 | if (seeding_dev) { |
1491 | ret = init_first_rw_device(trans, root, device); | 1525 | ret = init_first_rw_device(trans, root, device); |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3c1f7310421e..5139a833f721 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -96,7 +96,12 @@ struct btrfs_fs_devices { | |||
96 | u64 rw_devices; | 96 | u64 rw_devices; |
97 | u64 total_rw_bytes; | 97 | u64 total_rw_bytes; |
98 | struct block_device *latest_bdev; | 98 | struct block_device *latest_bdev; |
99 | /* all of the devices in the FS */ | 99 | |
100 | /* all of the devices in the FS, protected by a mutex | ||
101 | * so we can safely walk it to write out the supers without | ||
102 | * worrying about add/remove by the multi-device code | ||
103 | */ | ||
104 | struct mutex device_list_mutex; | ||
100 | struct list_head devices; | 105 | struct list_head devices; |
101 | 106 | ||
102 | /* devices not currently being allocated */ | 107 | /* devices not currently being allocated */ |