aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/super.c251
-rw-r--r--include/linux/fs.h150
2 files changed, 373 insertions, 28 deletions
diff --git a/fs/super.c b/fs/super.c
index c743fb3be4b8..0f64ecb7b1bf 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -33,12 +33,19 @@
33#include <linux/rculist_bl.h> 33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h> 34#include <linux/cleancache.h>
35#include <linux/fsnotify.h> 35#include <linux/fsnotify.h>
36#include <linux/lockdep.h>
36#include "internal.h" 37#include "internal.h"
37 38
38 39
39LIST_HEAD(super_blocks); 40LIST_HEAD(super_blocks);
40DEFINE_SPINLOCK(sb_lock); 41DEFINE_SPINLOCK(sb_lock);
41 42
43static char *sb_writers_name[SB_FREEZE_LEVELS] = {
44 "sb_writers",
45 "sb_pagefaults",
46 "sb_internal",
47};
48
42/* 49/*
43 * One thing we have to be careful of with a per-sb shrinker is that we don't 50 * One thing we have to be careful of with a per-sb shrinker is that we don't
44 * drop the last active reference to the superblock from within the shrinker. 51 * drop the last active reference to the superblock from within the shrinker.
@@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
102 return total_objects; 109 return total_objects;
103} 110}
104 111
112static int init_sb_writers(struct super_block *s, struct file_system_type *type)
113{
114 int err;
115 int i;
116
117 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
118 err = percpu_counter_init(&s->s_writers.counter[i], 0);
119 if (err < 0)
120 goto err_out;
121 lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
122 &type->s_writers_key[i], 0);
123 }
124 init_waitqueue_head(&s->s_writers.wait);
125 init_waitqueue_head(&s->s_writers.wait_unfrozen);
126 return 0;
127err_out:
128 while (--i >= 0)
129 percpu_counter_destroy(&s->s_writers.counter[i]);
130 return err;
131}
132
133static void destroy_sb_writers(struct super_block *s)
134{
135 int i;
136
137 for (i = 0; i < SB_FREEZE_LEVELS; i++)
138 percpu_counter_destroy(&s->s_writers.counter[i]);
139}
140
105/** 141/**
106 * alloc_super - create new superblock 142 * alloc_super - create new superblock
107 * @type: filesystem type superblock should belong to 143 * @type: filesystem type superblock should belong to
@@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
117 153
118 if (s) { 154 if (s) {
119 if (security_sb_alloc(s)) { 155 if (security_sb_alloc(s)) {
156 /*
157 * We cannot call security_sb_free() without
158 * security_sb_alloc() succeeding. So bail out manually
159 */
120 kfree(s); 160 kfree(s);
121 s = NULL; 161 s = NULL;
122 goto out; 162 goto out;
123 } 163 }
124#ifdef CONFIG_SMP 164#ifdef CONFIG_SMP
125 s->s_files = alloc_percpu(struct list_head); 165 s->s_files = alloc_percpu(struct list_head);
126 if (!s->s_files) { 166 if (!s->s_files)
127 security_sb_free(s); 167 goto err_out;
128 kfree(s); 168 else {
129 s = NULL;
130 goto out;
131 } else {
132 int i; 169 int i;
133 170
134 for_each_possible_cpu(i) 171 for_each_possible_cpu(i)
@@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
137#else 174#else
138 INIT_LIST_HEAD(&s->s_files); 175 INIT_LIST_HEAD(&s->s_files);
139#endif 176#endif
177 if (init_sb_writers(s, type))
178 goto err_out;
140 s->s_flags = flags; 179 s->s_flags = flags;
141 s->s_bdi = &default_backing_dev_info; 180 s->s_bdi = &default_backing_dev_info;
142 INIT_HLIST_NODE(&s->s_instances); 181 INIT_HLIST_NODE(&s->s_instances);
@@ -190,6 +229,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
190 } 229 }
191out: 230out:
192 return s; 231 return s;
232err_out:
233 security_sb_free(s);
234#ifdef CONFIG_SMP
235 if (s->s_files)
236 free_percpu(s->s_files);
237#endif
238 destroy_sb_writers(s);
239 kfree(s);
240 s = NULL;
241 goto out;
193} 242}
194 243
195/** 244/**
@@ -203,6 +252,7 @@ static inline void destroy_super(struct super_block *s)
203#ifdef CONFIG_SMP 252#ifdef CONFIG_SMP
204 free_percpu(s->s_files); 253 free_percpu(s->s_files);
205#endif 254#endif
255 destroy_sb_writers(s);
206 security_sb_free(s); 256 security_sb_free(s);
207 WARN_ON(!list_empty(&s->s_mounts)); 257 WARN_ON(!list_empty(&s->s_mounts));
208 kfree(s->s_subtype); 258 kfree(s->s_subtype);
@@ -651,10 +701,11 @@ struct super_block *get_super_thawed(struct block_device *bdev)
651{ 701{
652 while (1) { 702 while (1) {
653 struct super_block *s = get_super(bdev); 703 struct super_block *s = get_super(bdev);
654 if (!s || s->s_frozen == SB_UNFROZEN) 704 if (!s || s->s_writers.frozen == SB_UNFROZEN)
655 return s; 705 return s;
656 up_read(&s->s_umount); 706 up_read(&s->s_umount);
657 vfs_check_frozen(s, SB_FREEZE_WRITE); 707 wait_event(s->s_writers.wait_unfrozen,
708 s->s_writers.frozen == SB_UNFROZEN);
658 put_super(s); 709 put_super(s);
659 } 710 }
660} 711}
@@ -732,7 +783,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
732 int retval; 783 int retval;
733 int remount_ro; 784 int remount_ro;
734 785
735 if (sb->s_frozen != SB_UNFROZEN) 786 if (sb->s_writers.frozen != SB_UNFROZEN)
736 return -EBUSY; 787 return -EBUSY;
737 788
738#ifdef CONFIG_BLOCK 789#ifdef CONFIG_BLOCK
@@ -1163,6 +1214,120 @@ out:
1163 return ERR_PTR(error); 1214 return ERR_PTR(error);
1164} 1215}
1165 1216
1217/*
1218 * This is an internal function, please use sb_end_{write,pagefault,intwrite}
1219 * instead.
1220 */
1221void __sb_end_write(struct super_block *sb, int level)
1222{
1223 percpu_counter_dec(&sb->s_writers.counter[level-1]);
1224 /*
1225 * Make sure s_writers are updated before we wake up waiters in
1226 * freeze_super().
1227 */
1228 smp_mb();
1229 if (waitqueue_active(&sb->s_writers.wait))
1230 wake_up(&sb->s_writers.wait);
1231 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
1232}
1233EXPORT_SYMBOL(__sb_end_write);
1234
1235#ifdef CONFIG_LOCKDEP
1236/*
1237 * We want lockdep to tell us about possible deadlocks with freezing but
1238 * it's it bit tricky to properly instrument it. Getting a freeze protection
1239 * works as getting a read lock but there are subtle problems. XFS for example
1240 * gets freeze protection on internal level twice in some cases, which is OK
1241 * only because we already hold a freeze protection also on higher level. Due
1242 * to these cases we have to tell lockdep we are doing trylock when we
1243 * already hold a freeze protection for a higher freeze level.
1244 */
1245static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
1246 unsigned long ip)
1247{
1248 int i;
1249
1250 if (!trylock) {
1251 for (i = 0; i < level - 1; i++)
1252 if (lock_is_held(&sb->s_writers.lock_map[i])) {
1253 trylock = true;
1254 break;
1255 }
1256 }
1257 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
1258}
1259#endif
1260
1261/*
1262 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
1263 * instead.
1264 */
1265int __sb_start_write(struct super_block *sb, int level, bool wait)
1266{
1267retry:
1268 if (unlikely(sb->s_writers.frozen >= level)) {
1269 if (!wait)
1270 return 0;
1271 wait_event(sb->s_writers.wait_unfrozen,
1272 sb->s_writers.frozen < level);
1273 }
1274
1275#ifdef CONFIG_LOCKDEP
1276 acquire_freeze_lock(sb, level, !wait, _RET_IP_);
1277#endif
1278 percpu_counter_inc(&sb->s_writers.counter[level-1]);
1279 /*
1280 * Make sure counter is updated before we check for frozen.
1281 * freeze_super() first sets frozen and then checks the counter.
1282 */
1283 smp_mb();
1284 if (unlikely(sb->s_writers.frozen >= level)) {
1285 __sb_end_write(sb, level);
1286 goto retry;
1287 }
1288 return 1;
1289}
1290EXPORT_SYMBOL(__sb_start_write);
1291
1292/**
1293 * sb_wait_write - wait until all writers to given file system finish
1294 * @sb: the super for which we wait
1295 * @level: type of writers we wait for (normal vs page fault)
1296 *
1297 * This function waits until there are no writers of given type to given file
1298 * system. Caller of this function should make sure there can be no new writers
1299 * of type @level before calling this function. Otherwise this function can
1300 * livelock.
1301 */
1302static void sb_wait_write(struct super_block *sb, int level)
1303{
1304 s64 writers;
1305
1306 /*
1307 * We just cycle-through lockdep here so that it does not complain
1308 * about returning with lock to userspace
1309 */
1310 rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
1311 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
1312
1313 do {
1314 DEFINE_WAIT(wait);
1315
1316 /*
1317 * We use a barrier in prepare_to_wait() to separate setting
1318 * of frozen and checking of the counter
1319 */
1320 prepare_to_wait(&sb->s_writers.wait, &wait,
1321 TASK_UNINTERRUPTIBLE);
1322
1323 writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
1324 if (writers)
1325 schedule();
1326
1327 finish_wait(&sb->s_writers.wait, &wait);
1328 } while (writers);
1329}
1330
1166/** 1331/**
1167 * freeze_super - lock the filesystem and force it into a consistent state 1332 * freeze_super - lock the filesystem and force it into a consistent state
1168 * @sb: the super to lock 1333 * @sb: the super to lock
@@ -1170,6 +1335,31 @@ out:
1170 * Syncs the super to make sure the filesystem is consistent and calls the fs's 1335 * Syncs the super to make sure the filesystem is consistent and calls the fs's
1171 * freeze_fs. Subsequent calls to this without first thawing the fs will return 1336 * freeze_fs. Subsequent calls to this without first thawing the fs will return
1172 * -EBUSY. 1337 * -EBUSY.
1338 *
1339 * During this function, sb->s_writers.frozen goes through these values:
1340 *
1341 * SB_UNFROZEN: File system is normal, all writes progress as usual.
1342 *
1343 * SB_FREEZE_WRITE: The file system is in the process of being frozen. New
1344 * writes should be blocked, though page faults are still allowed. We wait for
1345 * all writes to complete and then proceed to the next stage.
1346 *
1347 * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
1348 * but internal fs threads can still modify the filesystem (although they
1349 * should not dirty new pages or inodes), writeback can run etc. After waiting
1350 * for all running page faults we sync the filesystem which will clean all
1351 * dirty pages and inodes (no new dirty pages or inodes can be created when
1352 * sync is running).
1353 *
1354 * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
1355 * modification are blocked (e.g. XFS preallocation truncation on inode
1356 * reclaim). This is usually implemented by blocking new transactions for
1357 * filesystems that have them and need this additional guard. After all
1358 * internal writers are finished we call ->freeze_fs() to finish filesystem
1359 * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
1360 * mostly auxiliary for filesystems to verify they do not modify frozen fs.
1361 *
1362 * sb->s_writers.frozen is protected by sb->s_umount.
1173 */ 1363 */
1174int freeze_super(struct super_block *sb) 1364int freeze_super(struct super_block *sb)
1175{ 1365{
@@ -1177,7 +1367,7 @@ int freeze_super(struct super_block *sb)
1177 1367
1178 atomic_inc(&sb->s_active); 1368 atomic_inc(&sb->s_active);
1179 down_write(&sb->s_umount); 1369 down_write(&sb->s_umount);
1180 if (sb->s_frozen) { 1370 if (sb->s_writers.frozen != SB_UNFROZEN) {
1181 deactivate_locked_super(sb); 1371 deactivate_locked_super(sb);
1182 return -EBUSY; 1372 return -EBUSY;
1183 } 1373 }
@@ -1188,33 +1378,53 @@ int freeze_super(struct super_block *sb)
1188 } 1378 }
1189 1379
1190 if (sb->s_flags & MS_RDONLY) { 1380 if (sb->s_flags & MS_RDONLY) {
1191 sb->s_frozen = SB_FREEZE_TRANS; 1381 /* Nothing to do really... */
1192 smp_wmb(); 1382 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1193 up_write(&sb->s_umount); 1383 up_write(&sb->s_umount);
1194 return 0; 1384 return 0;
1195 } 1385 }
1196 1386
1197 sb->s_frozen = SB_FREEZE_WRITE; 1387 /* From now on, no new normal writers can start */
1388 sb->s_writers.frozen = SB_FREEZE_WRITE;
1389 smp_wmb();
1390
1391 /* Release s_umount to preserve sb_start_write -> s_umount ordering */
1392 up_write(&sb->s_umount);
1393
1394 sb_wait_write(sb, SB_FREEZE_WRITE);
1395
1396 /* Now we go and block page faults... */
1397 down_write(&sb->s_umount);
1398 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1198 smp_wmb(); 1399 smp_wmb();
1199 1400
1401 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1402
1403 /* All writers are done so after syncing there won't be dirty data */
1200 sync_filesystem(sb); 1404 sync_filesystem(sb);
1201 1405
1202 sb->s_frozen = SB_FREEZE_TRANS; 1406 /* Now wait for internal filesystem counter */
1407 sb->s_writers.frozen = SB_FREEZE_FS;
1203 smp_wmb(); 1408 smp_wmb();
1409 sb_wait_write(sb, SB_FREEZE_FS);
1204 1410
1205 sync_blockdev(sb->s_bdev);
1206 if (sb->s_op->freeze_fs) { 1411 if (sb->s_op->freeze_fs) {
1207 ret = sb->s_op->freeze_fs(sb); 1412 ret = sb->s_op->freeze_fs(sb);
1208 if (ret) { 1413 if (ret) {
1209 printk(KERN_ERR 1414 printk(KERN_ERR
1210 "VFS:Filesystem freeze failed\n"); 1415 "VFS:Filesystem freeze failed\n");
1211 sb->s_frozen = SB_UNFROZEN; 1416 sb->s_writers.frozen = SB_UNFROZEN;
1212 smp_wmb(); 1417 smp_wmb();
1213 wake_up(&sb->s_wait_unfrozen); 1418 wake_up(&sb->s_writers.wait_unfrozen);
1214 deactivate_locked_super(sb); 1419 deactivate_locked_super(sb);
1215 return ret; 1420 return ret;
1216 } 1421 }
1217 } 1422 }
1423 /*
1424 * This is just for debugging purposes so that fs can warn if it
1425 * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
1426 */
1427 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1218 up_write(&sb->s_umount); 1428 up_write(&sb->s_umount);
1219 return 0; 1429 return 0;
1220} 1430}
@@ -1231,7 +1441,7 @@ int thaw_super(struct super_block *sb)
1231 int error; 1441 int error;
1232 1442
1233 down_write(&sb->s_umount); 1443 down_write(&sb->s_umount);
1234 if (sb->s_frozen == SB_UNFROZEN) { 1444 if (sb->s_writers.frozen == SB_UNFROZEN) {
1235 up_write(&sb->s_umount); 1445 up_write(&sb->s_umount);
1236 return -EINVAL; 1446 return -EINVAL;
1237 } 1447 }
@@ -1244,16 +1454,15 @@ int thaw_super(struct super_block *sb)
1244 if (error) { 1454 if (error) {
1245 printk(KERN_ERR 1455 printk(KERN_ERR
1246 "VFS:Filesystem thaw failed\n"); 1456 "VFS:Filesystem thaw failed\n");
1247 sb->s_frozen = SB_FREEZE_TRANS;
1248 up_write(&sb->s_umount); 1457 up_write(&sb->s_umount);
1249 return error; 1458 return error;
1250 } 1459 }
1251 } 1460 }
1252 1461
1253out: 1462out:
1254 sb->s_frozen = SB_UNFROZEN; 1463 sb->s_writers.frozen = SB_UNFROZEN;
1255 smp_wmb(); 1464 smp_wmb();
1256 wake_up(&sb->s_wait_unfrozen); 1465 wake_up(&sb->s_writers.wait_unfrozen);
1257 deactivate_locked_super(sb); 1466 deactivate_locked_super(sb);
1258 1467
1259 return 0; 1468 return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 80c819cbe272..aefed9426b03 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -412,6 +412,7 @@ struct inodes_stat_t {
412#include <linux/shrinker.h> 412#include <linux/shrinker.h>
413#include <linux/migrate_mode.h> 413#include <linux/migrate_mode.h>
414#include <linux/uidgid.h> 414#include <linux/uidgid.h>
415#include <linux/lockdep.h>
415 416
416#include <asm/byteorder.h> 417#include <asm/byteorder.h>
417 418
@@ -1439,6 +1440,8 @@ extern void f_delown(struct file *filp);
1439extern pid_t f_getown(struct file *filp); 1440extern pid_t f_getown(struct file *filp);
1440extern int send_sigurg(struct fown_struct *fown); 1441extern int send_sigurg(struct fown_struct *fown);
1441 1442
1443struct mm_struct;
1444
1442/* 1445/*
1443 * Umount options 1446 * Umount options
1444 */ 1447 */
@@ -1452,6 +1455,32 @@ extern int send_sigurg(struct fown_struct *fown);
1452extern struct list_head super_blocks; 1455extern struct list_head super_blocks;
1453extern spinlock_t sb_lock; 1456extern spinlock_t sb_lock;
1454 1457
1458/* Possible states of 'frozen' field */
1459enum {
1460 SB_UNFROZEN = 0, /* FS is unfrozen */
1461 SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */
1462 SB_FREEZE_TRANS = 2,
1463 SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */
1464 SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop
1465 * internal threads if needed) */
1466 SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */
1467};
1468
1469#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
1470
1471struct sb_writers {
1472 /* Counters for counting writers at each level */
1473 struct percpu_counter counter[SB_FREEZE_LEVELS];
1474 wait_queue_head_t wait; /* queue for waiting for
1475 writers / faults to finish */
1476 int frozen; /* Is sb frozen? */
1477 wait_queue_head_t wait_unfrozen; /* queue for waiting for
1478 sb to be thawed */
1479#ifdef CONFIG_DEBUG_LOCK_ALLOC
1480 struct lockdep_map lock_map[SB_FREEZE_LEVELS];
1481#endif
1482};
1483
1455struct super_block { 1484struct super_block {
1456 struct list_head s_list; /* Keep this first */ 1485 struct list_head s_list; /* Keep this first */
1457 dev_t s_dev; /* search index; _not_ kdev_t */ 1486 dev_t s_dev; /* search index; _not_ kdev_t */
@@ -1501,6 +1530,7 @@ struct super_block {
1501 1530
1502 int s_frozen; 1531 int s_frozen;
1503 wait_queue_head_t s_wait_unfrozen; 1532 wait_queue_head_t s_wait_unfrozen;
1533 struct sb_writers s_writers;
1504 1534
1505 char s_id[32]; /* Informational name */ 1535 char s_id[32]; /* Informational name */
1506 u8 s_uuid[16]; /* UUID */ 1536 u8 s_uuid[16]; /* UUID */
@@ -1555,14 +1585,119 @@ extern struct timespec current_fs_time(struct super_block *sb);
1555/* 1585/*
1556 * Snapshotting support. 1586 * Snapshotting support.
1557 */ 1587 */
1558enum { 1588/* Will go away when all users are converted */
1559 SB_UNFROZEN = 0, 1589#define vfs_check_frozen(sb, level) do { } while (0)
1560 SB_FREEZE_WRITE = 1, 1590
1561 SB_FREEZE_TRANS = 2, 1591void __sb_end_write(struct super_block *sb, int level);
1562}; 1592int __sb_start_write(struct super_block *sb, int level, bool wait);
1593
1594/**
1595 * sb_end_write - drop write access to a superblock
1596 * @sb: the super we wrote to
1597 *
1598 * Decrement number of writers to the filesystem. Wake up possible waiters
1599 * wanting to freeze the filesystem.
1600 */
1601static inline void sb_end_write(struct super_block *sb)
1602{
1603 __sb_end_write(sb, SB_FREEZE_WRITE);
1604}
1605
1606/**
1607 * sb_end_pagefault - drop write access to a superblock from a page fault
1608 * @sb: the super we wrote to
1609 *
1610 * Decrement number of processes handling write page fault to the filesystem.
1611 * Wake up possible waiters wanting to freeze the filesystem.
1612 */
1613static inline void sb_end_pagefault(struct super_block *sb)
1614{
1615 __sb_end_write(sb, SB_FREEZE_PAGEFAULT);
1616}
1617
1618/**
1619 * sb_end_intwrite - drop write access to a superblock for internal fs purposes
1620 * @sb: the super we wrote to
1621 *
1622 * Decrement fs-internal number of writers to the filesystem. Wake up possible
1623 * waiters wanting to freeze the filesystem.
1624 */
1625static inline void sb_end_intwrite(struct super_block *sb)
1626{
1627 __sb_end_write(sb, SB_FREEZE_FS);
1628}
1629
1630/**
1631 * sb_start_write - get write access to a superblock
1632 * @sb: the super we write to
1633 *
1634 * When a process wants to write data or metadata to a file system (i.e. dirty
1635 * a page or an inode), it should embed the operation in a sb_start_write() -
1636 * sb_end_write() pair to get exclusion against file system freezing. This
1637 * function increments number of writers preventing freezing. If the file
1638 * system is already frozen, the function waits until the file system is
1639 * thawed.
1640 *
1641 * Since freeze protection behaves as a lock, users have to preserve
1642 * ordering of freeze protection and other filesystem locks. Generally,
1643 * freeze protection should be the outermost lock. In particular, we have:
1644 *
1645 * sb_start_write
1646 * -> i_mutex (write path, truncate, directory ops, ...)
1647 * -> s_umount (freeze_super, thaw_super)
1648 */
1649static inline void sb_start_write(struct super_block *sb)
1650{
1651 __sb_start_write(sb, SB_FREEZE_WRITE, true);
1652}
1653
1654static inline int sb_start_write_trylock(struct super_block *sb)
1655{
1656 return __sb_start_write(sb, SB_FREEZE_WRITE, false);
1657}
1658
1659/**
1660 * sb_start_pagefault - get write access to a superblock from a page fault
1661 * @sb: the super we write to
1662 *
1663 * When a process starts handling write page fault, it should embed the
1664 * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
1665 * exclusion against file system freezing. This is needed since the page fault
1666 * is going to dirty a page. This function increments number of running page
1667 * faults preventing freezing. If the file system is already frozen, the
1668 * function waits until the file system is thawed.
1669 *
1670 * Since page fault freeze protection behaves as a lock, users have to preserve
1671 * ordering of freeze protection and other filesystem locks. It is advised to
1672 * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault
1673 * handling code implies lock dependency:
1674 *
1675 * mmap_sem
1676 * -> sb_start_pagefault
1677 */
1678static inline void sb_start_pagefault(struct super_block *sb)
1679{
1680 __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true);
1681}
1682
1683/*
1684 * sb_start_intwrite - get write access to a superblock for internal fs purposes
1685 * @sb: the super we write to
1686 *
1687 * This is the third level of protection against filesystem freezing. It is
1688 * free for use by a filesystem. The only requirement is that it must rank
1689 * below sb_start_pagefault.
1690 *
1691 * For example filesystem can call sb_start_intwrite() when starting a
1692 * transaction which somewhat eases handling of freezing for internal sources
1693 * of filesystem changes (internal fs threads, discarding preallocation on file
1694 * close, etc.).
1695 */
1696static inline void sb_start_intwrite(struct super_block *sb)
1697{
1698 __sb_start_write(sb, SB_FREEZE_FS, true);
1699}
1563 1700
1564#define vfs_check_frozen(sb, level) \
1565 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
1566 1701
1567extern bool inode_owner_or_capable(const struct inode *inode); 1702extern bool inode_owner_or_capable(const struct inode *inode);
1568 1703
@@ -1886,6 +2021,7 @@ struct file_system_type {
1886 struct lock_class_key s_lock_key; 2021 struct lock_class_key s_lock_key;
1887 struct lock_class_key s_umount_key; 2022 struct lock_class_key s_umount_key;
1888 struct lock_class_key s_vfs_rename_key; 2023 struct lock_class_key s_vfs_rename_key;
2024 struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
1889 2025
1890 struct lock_class_key i_lock_key; 2026 struct lock_class_key i_lock_key;
1891 struct lock_class_key i_mutex_key; 2027 struct lock_class_key i_mutex_key;