diff options
-rw-r--r-- | fs/super.c | 251 | ||||
-rw-r--r-- | include/linux/fs.h | 150 |
2 files changed, 373 insertions, 28 deletions
diff --git a/fs/super.c b/fs/super.c index c743fb3be4b8..0f64ecb7b1bf 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -33,12 +33,19 @@ | |||
33 | #include <linux/rculist_bl.h> | 33 | #include <linux/rculist_bl.h> |
34 | #include <linux/cleancache.h> | 34 | #include <linux/cleancache.h> |
35 | #include <linux/fsnotify.h> | 35 | #include <linux/fsnotify.h> |
36 | #include <linux/lockdep.h> | ||
36 | #include "internal.h" | 37 | #include "internal.h" |
37 | 38 | ||
38 | 39 | ||
39 | LIST_HEAD(super_blocks); | 40 | LIST_HEAD(super_blocks); |
40 | DEFINE_SPINLOCK(sb_lock); | 41 | DEFINE_SPINLOCK(sb_lock); |
41 | 42 | ||
43 | static char *sb_writers_name[SB_FREEZE_LEVELS] = { | ||
44 | "sb_writers", | ||
45 | "sb_pagefaults", | ||
46 | "sb_internal", | ||
47 | }; | ||
48 | |||
42 | /* | 49 | /* |
43 | * One thing we have to be careful of with a per-sb shrinker is that we don't | 50 | * One thing we have to be careful of with a per-sb shrinker is that we don't |
44 | * drop the last active reference to the superblock from within the shrinker. | 51 | * drop the last active reference to the superblock from within the shrinker. |
@@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) | |||
102 | return total_objects; | 109 | return total_objects; |
103 | } | 110 | } |
104 | 111 | ||
112 | static int init_sb_writers(struct super_block *s, struct file_system_type *type) | ||
113 | { | ||
114 | int err; | ||
115 | int i; | ||
116 | |||
117 | for (i = 0; i < SB_FREEZE_LEVELS; i++) { | ||
118 | err = percpu_counter_init(&s->s_writers.counter[i], 0); | ||
119 | if (err < 0) | ||
120 | goto err_out; | ||
121 | lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], | ||
122 | &type->s_writers_key[i], 0); | ||
123 | } | ||
124 | init_waitqueue_head(&s->s_writers.wait); | ||
125 | init_waitqueue_head(&s->s_writers.wait_unfrozen); | ||
126 | return 0; | ||
127 | err_out: | ||
128 | while (--i >= 0) | ||
129 | percpu_counter_destroy(&s->s_writers.counter[i]); | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | static void destroy_sb_writers(struct super_block *s) | ||
134 | { | ||
135 | int i; | ||
136 | |||
137 | for (i = 0; i < SB_FREEZE_LEVELS; i++) | ||
138 | percpu_counter_destroy(&s->s_writers.counter[i]); | ||
139 | } | ||
140 | |||
105 | /** | 141 | /** |
106 | * alloc_super - create new superblock | 142 | * alloc_super - create new superblock |
107 | * @type: filesystem type superblock should belong to | 143 | * @type: filesystem type superblock should belong to |
@@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
117 | 153 | ||
118 | if (s) { | 154 | if (s) { |
119 | if (security_sb_alloc(s)) { | 155 | if (security_sb_alloc(s)) { |
156 | /* | ||
157 | * We cannot call security_sb_free() without | ||
158 | * security_sb_alloc() succeeding. So bail out manually | ||
159 | */ | ||
120 | kfree(s); | 160 | kfree(s); |
121 | s = NULL; | 161 | s = NULL; |
122 | goto out; | 162 | goto out; |
123 | } | 163 | } |
124 | #ifdef CONFIG_SMP | 164 | #ifdef CONFIG_SMP |
125 | s->s_files = alloc_percpu(struct list_head); | 165 | s->s_files = alloc_percpu(struct list_head); |
126 | if (!s->s_files) { | 166 | if (!s->s_files) |
127 | security_sb_free(s); | 167 | goto err_out; |
128 | kfree(s); | 168 | else { |
129 | s = NULL; | ||
130 | goto out; | ||
131 | } else { | ||
132 | int i; | 169 | int i; |
133 | 170 | ||
134 | for_each_possible_cpu(i) | 171 | for_each_possible_cpu(i) |
@@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
137 | #else | 174 | #else |
138 | INIT_LIST_HEAD(&s->s_files); | 175 | INIT_LIST_HEAD(&s->s_files); |
139 | #endif | 176 | #endif |
177 | if (init_sb_writers(s, type)) | ||
178 | goto err_out; | ||
140 | s->s_flags = flags; | 179 | s->s_flags = flags; |
141 | s->s_bdi = &default_backing_dev_info; | 180 | s->s_bdi = &default_backing_dev_info; |
142 | INIT_HLIST_NODE(&s->s_instances); | 181 | INIT_HLIST_NODE(&s->s_instances); |
@@ -190,6 +229,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
190 | } | 229 | } |
191 | out: | 230 | out: |
192 | return s; | 231 | return s; |
232 | err_out: | ||
233 | security_sb_free(s); | ||
234 | #ifdef CONFIG_SMP | ||
235 | if (s->s_files) | ||
236 | free_percpu(s->s_files); | ||
237 | #endif | ||
238 | destroy_sb_writers(s); | ||
239 | kfree(s); | ||
240 | s = NULL; | ||
241 | goto out; | ||
193 | } | 242 | } |
194 | 243 | ||
195 | /** | 244 | /** |
@@ -203,6 +252,7 @@ static inline void destroy_super(struct super_block *s) | |||
203 | #ifdef CONFIG_SMP | 252 | #ifdef CONFIG_SMP |
204 | free_percpu(s->s_files); | 253 | free_percpu(s->s_files); |
205 | #endif | 254 | #endif |
255 | destroy_sb_writers(s); | ||
206 | security_sb_free(s); | 256 | security_sb_free(s); |
207 | WARN_ON(!list_empty(&s->s_mounts)); | 257 | WARN_ON(!list_empty(&s->s_mounts)); |
208 | kfree(s->s_subtype); | 258 | kfree(s->s_subtype); |
@@ -651,10 +701,11 @@ struct super_block *get_super_thawed(struct block_device *bdev) | |||
651 | { | 701 | { |
652 | while (1) { | 702 | while (1) { |
653 | struct super_block *s = get_super(bdev); | 703 | struct super_block *s = get_super(bdev); |
654 | if (!s || s->s_frozen == SB_UNFROZEN) | 704 | if (!s || s->s_writers.frozen == SB_UNFROZEN) |
655 | return s; | 705 | return s; |
656 | up_read(&s->s_umount); | 706 | up_read(&s->s_umount); |
657 | vfs_check_frozen(s, SB_FREEZE_WRITE); | 707 | wait_event(s->s_writers.wait_unfrozen, |
708 | s->s_writers.frozen == SB_UNFROZEN); | ||
658 | put_super(s); | 709 | put_super(s); |
659 | } | 710 | } |
660 | } | 711 | } |
@@ -732,7 +783,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
732 | int retval; | 783 | int retval; |
733 | int remount_ro; | 784 | int remount_ro; |
734 | 785 | ||
735 | if (sb->s_frozen != SB_UNFROZEN) | 786 | if (sb->s_writers.frozen != SB_UNFROZEN) |
736 | return -EBUSY; | 787 | return -EBUSY; |
737 | 788 | ||
738 | #ifdef CONFIG_BLOCK | 789 | #ifdef CONFIG_BLOCK |
@@ -1163,6 +1214,120 @@ out: | |||
1163 | return ERR_PTR(error); | 1214 | return ERR_PTR(error); |
1164 | } | 1215 | } |
1165 | 1216 | ||
1217 | /* | ||
1218 | * This is an internal function, please use sb_end_{write,pagefault,intwrite} | ||
1219 | * instead. | ||
1220 | */ | ||
1221 | void __sb_end_write(struct super_block *sb, int level) | ||
1222 | { | ||
1223 | percpu_counter_dec(&sb->s_writers.counter[level-1]); | ||
1224 | /* | ||
1225 | * Make sure s_writers are updated before we wake up waiters in | ||
1226 | * freeze_super(). | ||
1227 | */ | ||
1228 | smp_mb(); | ||
1229 | if (waitqueue_active(&sb->s_writers.wait)) | ||
1230 | wake_up(&sb->s_writers.wait); | ||
1231 | rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_); | ||
1232 | } | ||
1233 | EXPORT_SYMBOL(__sb_end_write); | ||
1234 | |||
1235 | #ifdef CONFIG_LOCKDEP | ||
1236 | /* | ||
1237 | * We want lockdep to tell us about possible deadlocks with freezing but | ||
1238 | * it's it bit tricky to properly instrument it. Getting a freeze protection | ||
1239 | * works as getting a read lock but there are subtle problems. XFS for example | ||
1240 | * gets freeze protection on internal level twice in some cases, which is OK | ||
1241 | * only because we already hold a freeze protection also on higher level. Due | ||
1242 | * to these cases we have to tell lockdep we are doing trylock when we | ||
1243 | * already hold a freeze protection for a higher freeze level. | ||
1244 | */ | ||
1245 | static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock, | ||
1246 | unsigned long ip) | ||
1247 | { | ||
1248 | int i; | ||
1249 | |||
1250 | if (!trylock) { | ||
1251 | for (i = 0; i < level - 1; i++) | ||
1252 | if (lock_is_held(&sb->s_writers.lock_map[i])) { | ||
1253 | trylock = true; | ||
1254 | break; | ||
1255 | } | ||
1256 | } | ||
1257 | rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip); | ||
1258 | } | ||
1259 | #endif | ||
1260 | |||
1261 | /* | ||
1262 | * This is an internal function, please use sb_start_{write,pagefault,intwrite} | ||
1263 | * instead. | ||
1264 | */ | ||
1265 | int __sb_start_write(struct super_block *sb, int level, bool wait) | ||
1266 | { | ||
1267 | retry: | ||
1268 | if (unlikely(sb->s_writers.frozen >= level)) { | ||
1269 | if (!wait) | ||
1270 | return 0; | ||
1271 | wait_event(sb->s_writers.wait_unfrozen, | ||
1272 | sb->s_writers.frozen < level); | ||
1273 | } | ||
1274 | |||
1275 | #ifdef CONFIG_LOCKDEP | ||
1276 | acquire_freeze_lock(sb, level, !wait, _RET_IP_); | ||
1277 | #endif | ||
1278 | percpu_counter_inc(&sb->s_writers.counter[level-1]); | ||
1279 | /* | ||
1280 | * Make sure counter is updated before we check for frozen. | ||
1281 | * freeze_super() first sets frozen and then checks the counter. | ||
1282 | */ | ||
1283 | smp_mb(); | ||
1284 | if (unlikely(sb->s_writers.frozen >= level)) { | ||
1285 | __sb_end_write(sb, level); | ||
1286 | goto retry; | ||
1287 | } | ||
1288 | return 1; | ||
1289 | } | ||
1290 | EXPORT_SYMBOL(__sb_start_write); | ||
1291 | |||
1292 | /** | ||
1293 | * sb_wait_write - wait until all writers to given file system finish | ||
1294 | * @sb: the super for which we wait | ||
1295 | * @level: type of writers we wait for (normal vs page fault) | ||
1296 | * | ||
1297 | * This function waits until there are no writers of given type to given file | ||
1298 | * system. Caller of this function should make sure there can be no new writers | ||
1299 | * of type @level before calling this function. Otherwise this function can | ||
1300 | * livelock. | ||
1301 | */ | ||
1302 | static void sb_wait_write(struct super_block *sb, int level) | ||
1303 | { | ||
1304 | s64 writers; | ||
1305 | |||
1306 | /* | ||
1307 | * We just cycle-through lockdep here so that it does not complain | ||
1308 | * about returning with lock to userspace | ||
1309 | */ | ||
1310 | rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_); | ||
1311 | rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_); | ||
1312 | |||
1313 | do { | ||
1314 | DEFINE_WAIT(wait); | ||
1315 | |||
1316 | /* | ||
1317 | * We use a barrier in prepare_to_wait() to separate setting | ||
1318 | * of frozen and checking of the counter | ||
1319 | */ | ||
1320 | prepare_to_wait(&sb->s_writers.wait, &wait, | ||
1321 | TASK_UNINTERRUPTIBLE); | ||
1322 | |||
1323 | writers = percpu_counter_sum(&sb->s_writers.counter[level-1]); | ||
1324 | if (writers) | ||
1325 | schedule(); | ||
1326 | |||
1327 | finish_wait(&sb->s_writers.wait, &wait); | ||
1328 | } while (writers); | ||
1329 | } | ||
1330 | |||
1166 | /** | 1331 | /** |
1167 | * freeze_super - lock the filesystem and force it into a consistent state | 1332 | * freeze_super - lock the filesystem and force it into a consistent state |
1168 | * @sb: the super to lock | 1333 | * @sb: the super to lock |
@@ -1170,6 +1335,31 @@ out: | |||
1170 | * Syncs the super to make sure the filesystem is consistent and calls the fs's | 1335 | * Syncs the super to make sure the filesystem is consistent and calls the fs's |
1171 | * freeze_fs. Subsequent calls to this without first thawing the fs will return | 1336 | * freeze_fs. Subsequent calls to this without first thawing the fs will return |
1172 | * -EBUSY. | 1337 | * -EBUSY. |
1338 | * | ||
1339 | * During this function, sb->s_writers.frozen goes through these values: | ||
1340 | * | ||
1341 | * SB_UNFROZEN: File system is normal, all writes progress as usual. | ||
1342 | * | ||
1343 | * SB_FREEZE_WRITE: The file system is in the process of being frozen. New | ||
1344 | * writes should be blocked, though page faults are still allowed. We wait for | ||
1345 | * all writes to complete and then proceed to the next stage. | ||
1346 | * | ||
1347 | * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked | ||
1348 | * but internal fs threads can still modify the filesystem (although they | ||
1349 | * should not dirty new pages or inodes), writeback can run etc. After waiting | ||
1350 | * for all running page faults we sync the filesystem which will clean all | ||
1351 | * dirty pages and inodes (no new dirty pages or inodes can be created when | ||
1352 | * sync is running). | ||
1353 | * | ||
1354 | * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs | ||
1355 | * modification are blocked (e.g. XFS preallocation truncation on inode | ||
1356 | * reclaim). This is usually implemented by blocking new transactions for | ||
1357 | * filesystems that have them and need this additional guard. After all | ||
1358 | * internal writers are finished we call ->freeze_fs() to finish filesystem | ||
1359 | * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is | ||
1360 | * mostly auxiliary for filesystems to verify they do not modify frozen fs. | ||
1361 | * | ||
1362 | * sb->s_writers.frozen is protected by sb->s_umount. | ||
1173 | */ | 1363 | */ |
1174 | int freeze_super(struct super_block *sb) | 1364 | int freeze_super(struct super_block *sb) |
1175 | { | 1365 | { |
@@ -1177,7 +1367,7 @@ int freeze_super(struct super_block *sb) | |||
1177 | 1367 | ||
1178 | atomic_inc(&sb->s_active); | 1368 | atomic_inc(&sb->s_active); |
1179 | down_write(&sb->s_umount); | 1369 | down_write(&sb->s_umount); |
1180 | if (sb->s_frozen) { | 1370 | if (sb->s_writers.frozen != SB_UNFROZEN) { |
1181 | deactivate_locked_super(sb); | 1371 | deactivate_locked_super(sb); |
1182 | return -EBUSY; | 1372 | return -EBUSY; |
1183 | } | 1373 | } |
@@ -1188,33 +1378,53 @@ int freeze_super(struct super_block *sb) | |||
1188 | } | 1378 | } |
1189 | 1379 | ||
1190 | if (sb->s_flags & MS_RDONLY) { | 1380 | if (sb->s_flags & MS_RDONLY) { |
1191 | sb->s_frozen = SB_FREEZE_TRANS; | 1381 | /* Nothing to do really... */ |
1192 | smp_wmb(); | 1382 | sb->s_writers.frozen = SB_FREEZE_COMPLETE; |
1193 | up_write(&sb->s_umount); | 1383 | up_write(&sb->s_umount); |
1194 | return 0; | 1384 | return 0; |
1195 | } | 1385 | } |
1196 | 1386 | ||
1197 | sb->s_frozen = SB_FREEZE_WRITE; | 1387 | /* From now on, no new normal writers can start */ |
1388 | sb->s_writers.frozen = SB_FREEZE_WRITE; | ||
1389 | smp_wmb(); | ||
1390 | |||
1391 | /* Release s_umount to preserve sb_start_write -> s_umount ordering */ | ||
1392 | up_write(&sb->s_umount); | ||
1393 | |||
1394 | sb_wait_write(sb, SB_FREEZE_WRITE); | ||
1395 | |||
1396 | /* Now we go and block page faults... */ | ||
1397 | down_write(&sb->s_umount); | ||
1398 | sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; | ||
1198 | smp_wmb(); | 1399 | smp_wmb(); |
1199 | 1400 | ||
1401 | sb_wait_write(sb, SB_FREEZE_PAGEFAULT); | ||
1402 | |||
1403 | /* All writers are done so after syncing there won't be dirty data */ | ||
1200 | sync_filesystem(sb); | 1404 | sync_filesystem(sb); |
1201 | 1405 | ||
1202 | sb->s_frozen = SB_FREEZE_TRANS; | 1406 | /* Now wait for internal filesystem counter */ |
1407 | sb->s_writers.frozen = SB_FREEZE_FS; | ||
1203 | smp_wmb(); | 1408 | smp_wmb(); |
1409 | sb_wait_write(sb, SB_FREEZE_FS); | ||
1204 | 1410 | ||
1205 | sync_blockdev(sb->s_bdev); | ||
1206 | if (sb->s_op->freeze_fs) { | 1411 | if (sb->s_op->freeze_fs) { |
1207 | ret = sb->s_op->freeze_fs(sb); | 1412 | ret = sb->s_op->freeze_fs(sb); |
1208 | if (ret) { | 1413 | if (ret) { |
1209 | printk(KERN_ERR | 1414 | printk(KERN_ERR |
1210 | "VFS:Filesystem freeze failed\n"); | 1415 | "VFS:Filesystem freeze failed\n"); |
1211 | sb->s_frozen = SB_UNFROZEN; | 1416 | sb->s_writers.frozen = SB_UNFROZEN; |
1212 | smp_wmb(); | 1417 | smp_wmb(); |
1213 | wake_up(&sb->s_wait_unfrozen); | 1418 | wake_up(&sb->s_writers.wait_unfrozen); |
1214 | deactivate_locked_super(sb); | 1419 | deactivate_locked_super(sb); |
1215 | return ret; | 1420 | return ret; |
1216 | } | 1421 | } |
1217 | } | 1422 | } |
1423 | /* | ||
1424 | * This is just for debugging purposes so that fs can warn if it | ||
1425 | * sees write activity when frozen is set to SB_FREEZE_COMPLETE. | ||
1426 | */ | ||
1427 | sb->s_writers.frozen = SB_FREEZE_COMPLETE; | ||
1218 | up_write(&sb->s_umount); | 1428 | up_write(&sb->s_umount); |
1219 | return 0; | 1429 | return 0; |
1220 | } | 1430 | } |
@@ -1231,7 +1441,7 @@ int thaw_super(struct super_block *sb) | |||
1231 | int error; | 1441 | int error; |
1232 | 1442 | ||
1233 | down_write(&sb->s_umount); | 1443 | down_write(&sb->s_umount); |
1234 | if (sb->s_frozen == SB_UNFROZEN) { | 1444 | if (sb->s_writers.frozen == SB_UNFROZEN) { |
1235 | up_write(&sb->s_umount); | 1445 | up_write(&sb->s_umount); |
1236 | return -EINVAL; | 1446 | return -EINVAL; |
1237 | } | 1447 | } |
@@ -1244,16 +1454,15 @@ int thaw_super(struct super_block *sb) | |||
1244 | if (error) { | 1454 | if (error) { |
1245 | printk(KERN_ERR | 1455 | printk(KERN_ERR |
1246 | "VFS:Filesystem thaw failed\n"); | 1456 | "VFS:Filesystem thaw failed\n"); |
1247 | sb->s_frozen = SB_FREEZE_TRANS; | ||
1248 | up_write(&sb->s_umount); | 1457 | up_write(&sb->s_umount); |
1249 | return error; | 1458 | return error; |
1250 | } | 1459 | } |
1251 | } | 1460 | } |
1252 | 1461 | ||
1253 | out: | 1462 | out: |
1254 | sb->s_frozen = SB_UNFROZEN; | 1463 | sb->s_writers.frozen = SB_UNFROZEN; |
1255 | smp_wmb(); | 1464 | smp_wmb(); |
1256 | wake_up(&sb->s_wait_unfrozen); | 1465 | wake_up(&sb->s_writers.wait_unfrozen); |
1257 | deactivate_locked_super(sb); | 1466 | deactivate_locked_super(sb); |
1258 | 1467 | ||
1259 | return 0; | 1468 | return 0; |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 80c819cbe272..aefed9426b03 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -412,6 +412,7 @@ struct inodes_stat_t { | |||
412 | #include <linux/shrinker.h> | 412 | #include <linux/shrinker.h> |
413 | #include <linux/migrate_mode.h> | 413 | #include <linux/migrate_mode.h> |
414 | #include <linux/uidgid.h> | 414 | #include <linux/uidgid.h> |
415 | #include <linux/lockdep.h> | ||
415 | 416 | ||
416 | #include <asm/byteorder.h> | 417 | #include <asm/byteorder.h> |
417 | 418 | ||
@@ -1439,6 +1440,8 @@ extern void f_delown(struct file *filp); | |||
1439 | extern pid_t f_getown(struct file *filp); | 1440 | extern pid_t f_getown(struct file *filp); |
1440 | extern int send_sigurg(struct fown_struct *fown); | 1441 | extern int send_sigurg(struct fown_struct *fown); |
1441 | 1442 | ||
1443 | struct mm_struct; | ||
1444 | |||
1442 | /* | 1445 | /* |
1443 | * Umount options | 1446 | * Umount options |
1444 | */ | 1447 | */ |
@@ -1452,6 +1455,32 @@ extern int send_sigurg(struct fown_struct *fown); | |||
1452 | extern struct list_head super_blocks; | 1455 | extern struct list_head super_blocks; |
1453 | extern spinlock_t sb_lock; | 1456 | extern spinlock_t sb_lock; |
1454 | 1457 | ||
1458 | /* Possible states of 'frozen' field */ | ||
1459 | enum { | ||
1460 | SB_UNFROZEN = 0, /* FS is unfrozen */ | ||
1461 | SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ | ||
1462 | SB_FREEZE_TRANS = 2, | ||
1463 | SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ | ||
1464 | SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop | ||
1465 | * internal threads if needed) */ | ||
1466 | SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */ | ||
1467 | }; | ||
1468 | |||
1469 | #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) | ||
1470 | |||
1471 | struct sb_writers { | ||
1472 | /* Counters for counting writers at each level */ | ||
1473 | struct percpu_counter counter[SB_FREEZE_LEVELS]; | ||
1474 | wait_queue_head_t wait; /* queue for waiting for | ||
1475 | writers / faults to finish */ | ||
1476 | int frozen; /* Is sb frozen? */ | ||
1477 | wait_queue_head_t wait_unfrozen; /* queue for waiting for | ||
1478 | sb to be thawed */ | ||
1479 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
1480 | struct lockdep_map lock_map[SB_FREEZE_LEVELS]; | ||
1481 | #endif | ||
1482 | }; | ||
1483 | |||
1455 | struct super_block { | 1484 | struct super_block { |
1456 | struct list_head s_list; /* Keep this first */ | 1485 | struct list_head s_list; /* Keep this first */ |
1457 | dev_t s_dev; /* search index; _not_ kdev_t */ | 1486 | dev_t s_dev; /* search index; _not_ kdev_t */ |
@@ -1501,6 +1530,7 @@ struct super_block { | |||
1501 | 1530 | ||
1502 | int s_frozen; | 1531 | int s_frozen; |
1503 | wait_queue_head_t s_wait_unfrozen; | 1532 | wait_queue_head_t s_wait_unfrozen; |
1533 | struct sb_writers s_writers; | ||
1504 | 1534 | ||
1505 | char s_id[32]; /* Informational name */ | 1535 | char s_id[32]; /* Informational name */ |
1506 | u8 s_uuid[16]; /* UUID */ | 1536 | u8 s_uuid[16]; /* UUID */ |
@@ -1555,14 +1585,119 @@ extern struct timespec current_fs_time(struct super_block *sb); | |||
1555 | /* | 1585 | /* |
1556 | * Snapshotting support. | 1586 | * Snapshotting support. |
1557 | */ | 1587 | */ |
1558 | enum { | 1588 | /* Will go away when all users are converted */ |
1559 | SB_UNFROZEN = 0, | 1589 | #define vfs_check_frozen(sb, level) do { } while (0) |
1560 | SB_FREEZE_WRITE = 1, | 1590 | |
1561 | SB_FREEZE_TRANS = 2, | 1591 | void __sb_end_write(struct super_block *sb, int level); |
1562 | }; | 1592 | int __sb_start_write(struct super_block *sb, int level, bool wait); |
1593 | |||
1594 | /** | ||
1595 | * sb_end_write - drop write access to a superblock | ||
1596 | * @sb: the super we wrote to | ||
1597 | * | ||
1598 | * Decrement number of writers to the filesystem. Wake up possible waiters | ||
1599 | * wanting to freeze the filesystem. | ||
1600 | */ | ||
1601 | static inline void sb_end_write(struct super_block *sb) | ||
1602 | { | ||
1603 | __sb_end_write(sb, SB_FREEZE_WRITE); | ||
1604 | } | ||
1605 | |||
1606 | /** | ||
1607 | * sb_end_pagefault - drop write access to a superblock from a page fault | ||
1608 | * @sb: the super we wrote to | ||
1609 | * | ||
1610 | * Decrement number of processes handling write page fault to the filesystem. | ||
1611 | * Wake up possible waiters wanting to freeze the filesystem. | ||
1612 | */ | ||
1613 | static inline void sb_end_pagefault(struct super_block *sb) | ||
1614 | { | ||
1615 | __sb_end_write(sb, SB_FREEZE_PAGEFAULT); | ||
1616 | } | ||
1617 | |||
1618 | /** | ||
1619 | * sb_end_intwrite - drop write access to a superblock for internal fs purposes | ||
1620 | * @sb: the super we wrote to | ||
1621 | * | ||
1622 | * Decrement fs-internal number of writers to the filesystem. Wake up possible | ||
1623 | * waiters wanting to freeze the filesystem. | ||
1624 | */ | ||
1625 | static inline void sb_end_intwrite(struct super_block *sb) | ||
1626 | { | ||
1627 | __sb_end_write(sb, SB_FREEZE_FS); | ||
1628 | } | ||
1629 | |||
1630 | /** | ||
1631 | * sb_start_write - get write access to a superblock | ||
1632 | * @sb: the super we write to | ||
1633 | * | ||
1634 | * When a process wants to write data or metadata to a file system (i.e. dirty | ||
1635 | * a page or an inode), it should embed the operation in a sb_start_write() - | ||
1636 | * sb_end_write() pair to get exclusion against file system freezing. This | ||
1637 | * function increments number of writers preventing freezing. If the file | ||
1638 | * system is already frozen, the function waits until the file system is | ||
1639 | * thawed. | ||
1640 | * | ||
1641 | * Since freeze protection behaves as a lock, users have to preserve | ||
1642 | * ordering of freeze protection and other filesystem locks. Generally, | ||
1643 | * freeze protection should be the outermost lock. In particular, we have: | ||
1644 | * | ||
1645 | * sb_start_write | ||
1646 | * -> i_mutex (write path, truncate, directory ops, ...) | ||
1647 | * -> s_umount (freeze_super, thaw_super) | ||
1648 | */ | ||
1649 | static inline void sb_start_write(struct super_block *sb) | ||
1650 | { | ||
1651 | __sb_start_write(sb, SB_FREEZE_WRITE, true); | ||
1652 | } | ||
1653 | |||
1654 | static inline int sb_start_write_trylock(struct super_block *sb) | ||
1655 | { | ||
1656 | return __sb_start_write(sb, SB_FREEZE_WRITE, false); | ||
1657 | } | ||
1658 | |||
1659 | /** | ||
1660 | * sb_start_pagefault - get write access to a superblock from a page fault | ||
1661 | * @sb: the super we write to | ||
1662 | * | ||
1663 | * When a process starts handling write page fault, it should embed the | ||
1664 | * operation into sb_start_pagefault() - sb_end_pagefault() pair to get | ||
1665 | * exclusion against file system freezing. This is needed since the page fault | ||
1666 | * is going to dirty a page. This function increments number of running page | ||
1667 | * faults preventing freezing. If the file system is already frozen, the | ||
1668 | * function waits until the file system is thawed. | ||
1669 | * | ||
1670 | * Since page fault freeze protection behaves as a lock, users have to preserve | ||
1671 | * ordering of freeze protection and other filesystem locks. It is advised to | ||
1672 | * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault | ||
1673 | * handling code implies lock dependency: | ||
1674 | * | ||
1675 | * mmap_sem | ||
1676 | * -> sb_start_pagefault | ||
1677 | */ | ||
1678 | static inline void sb_start_pagefault(struct super_block *sb) | ||
1679 | { | ||
1680 | __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); | ||
1681 | } | ||
1682 | |||
1683 | /* | ||
1684 | * sb_start_intwrite - get write access to a superblock for internal fs purposes | ||
1685 | * @sb: the super we write to | ||
1686 | * | ||
1687 | * This is the third level of protection against filesystem freezing. It is | ||
1688 | * free for use by a filesystem. The only requirement is that it must rank | ||
1689 | * below sb_start_pagefault. | ||
1690 | * | ||
1691 | * For example filesystem can call sb_start_intwrite() when starting a | ||
1692 | * transaction which somewhat eases handling of freezing for internal sources | ||
1693 | * of filesystem changes (internal fs threads, discarding preallocation on file | ||
1694 | * close, etc.). | ||
1695 | */ | ||
1696 | static inline void sb_start_intwrite(struct super_block *sb) | ||
1697 | { | ||
1698 | __sb_start_write(sb, SB_FREEZE_FS, true); | ||
1699 | } | ||
1563 | 1700 | ||
1564 | #define vfs_check_frozen(sb, level) \ | ||
1565 | wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) | ||
1566 | 1701 | ||
1567 | extern bool inode_owner_or_capable(const struct inode *inode); | 1702 | extern bool inode_owner_or_capable(const struct inode *inode); |
1568 | 1703 | ||
@@ -1886,6 +2021,7 @@ struct file_system_type { | |||
1886 | struct lock_class_key s_lock_key; | 2021 | struct lock_class_key s_lock_key; |
1887 | struct lock_class_key s_umount_key; | 2022 | struct lock_class_key s_umount_key; |
1888 | struct lock_class_key s_vfs_rename_key; | 2023 | struct lock_class_key s_vfs_rename_key; |
2024 | struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; | ||
1889 | 2025 | ||
1890 | struct lock_class_key i_lock_key; | 2026 | struct lock_class_key i_lock_key; |
1891 | struct lock_class_key i_mutex_key; | 2027 | struct lock_class_key i_mutex_key; |