aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/fs.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-08-01 13:26:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-08-01 13:26:23 -0400
commita0e881b7c189fa2bd76c024dbff91e79511c971d (patch)
tree0c801918565b08921d21aceee5b326f64d998f5f /include/linux/fs.h
parenteff0d13f3823f35d70228cd151d2a2c89288ff32 (diff)
parentdbc6e0222d79e78925fe20733844a796a4b72cf9 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull second vfs pile from Al Viro: "The stuff in there: fsfreeze deadlock fixes by Jan (essentially, the deadlock reproduced by xfstests 068), symlink and hardlink restriction patches, plus assorted cleanups and fixes. Note that another fsfreeze deadlock (emergency thaw one) is *not* dealt with - the series by Fernando conflicts a lot with Jan's, breaks userland ABI (FIFREEZE semantics gets changed) and trades the deadlock for massive vfsmount leak; this is going to be handled next cycle. There probably will be another pull request, but that stuff won't be in it." Fix up trivial conflicts due to unrelated changes next to each other in drivers/{staging/gdm72xx/usb_boot.c, usb/gadget/storage_common.c} * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (54 commits) delousing target_core_file a bit Documentation: Correct s_umount state for freeze_fs/unfreeze_fs fs: Remove old freezing mechanism ext2: Implement freezing btrfs: Convert to new freezing mechanism nilfs2: Convert to new freezing mechanism ntfs: Convert to new freezing mechanism fuse: Convert to new freezing mechanism gfs2: Convert to new freezing mechanism ocfs2: Convert to new freezing mechanism xfs: Convert to new freezing code ext4: Convert to new freezing mechanism fs: Protect write paths by sb_start_write - sb_end_write fs: Skip atime update on frozen filesystem fs: Add freezing handling to mnt_want_write() / mnt_drop_write() fs: Improve filesystem freezing handling switch the protection of percpu_counter list to spinlock nfsd: Push mnt_want_write() outside of i_mutex btrfs: Push mnt_want_write() outside of i_mutex fat: Push mnt_want_write() outside of i_mutex ...
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r--include/linux/fs.h154
1 files changed, 142 insertions, 12 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4ba5c8715523..38dba16c4176 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -414,6 +414,7 @@ struct inodes_stat_t {
414#include <linux/shrinker.h> 414#include <linux/shrinker.h>
415#include <linux/migrate_mode.h> 415#include <linux/migrate_mode.h>
416#include <linux/uidgid.h> 416#include <linux/uidgid.h>
417#include <linux/lockdep.h>
417 418
418#include <asm/byteorder.h> 419#include <asm/byteorder.h>
419 420
@@ -440,6 +441,8 @@ extern unsigned long get_max_files(void);
440extern int sysctl_nr_open; 441extern int sysctl_nr_open;
441extern struct inodes_stat_t inodes_stat; 442extern struct inodes_stat_t inodes_stat;
442extern int leases_enable, lease_break_time; 443extern int leases_enable, lease_break_time;
444extern int sysctl_protected_symlinks;
445extern int sysctl_protected_hardlinks;
443 446
444struct buffer_head; 447struct buffer_head;
445typedef int (get_block_t)(struct inode *inode, sector_t iblock, 448typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@@ -1445,6 +1448,8 @@ extern void f_delown(struct file *filp);
1445extern pid_t f_getown(struct file *filp); 1448extern pid_t f_getown(struct file *filp);
1446extern int send_sigurg(struct fown_struct *fown); 1449extern int send_sigurg(struct fown_struct *fown);
1447 1450
1451struct mm_struct;
1452
1448/* 1453/*
1449 * Umount options 1454 * Umount options
1450 */ 1455 */
@@ -1458,6 +1463,31 @@ extern int send_sigurg(struct fown_struct *fown);
1458extern struct list_head super_blocks; 1463extern struct list_head super_blocks;
1459extern spinlock_t sb_lock; 1464extern spinlock_t sb_lock;
1460 1465
1466/* Possible states of 'frozen' field */
1467enum {
1468 SB_UNFROZEN = 0, /* FS is unfrozen */
1469 SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */
1470 SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */
1471 SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop
1472 * internal threads if needed) */
1473 SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */
1474};
1475
1476#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
1477
1478struct sb_writers {
1479 /* Counters for counting writers at each level */
1480 struct percpu_counter counter[SB_FREEZE_LEVELS];
1481 wait_queue_head_t wait; /* queue for waiting for
1482 writers / faults to finish */
1483 int frozen; /* Is sb frozen? */
1484 wait_queue_head_t wait_unfrozen; /* queue for waiting for
1485 sb to be thawed */
1486#ifdef CONFIG_DEBUG_LOCK_ALLOC
1487 struct lockdep_map lock_map[SB_FREEZE_LEVELS];
1488#endif
1489};
1490
1461struct super_block { 1491struct super_block {
1462 struct list_head s_list; /* Keep this first */ 1492 struct list_head s_list; /* Keep this first */
1463 dev_t s_dev; /* search index; _not_ kdev_t */ 1493 dev_t s_dev; /* search index; _not_ kdev_t */
@@ -1505,8 +1535,7 @@ struct super_block {
1505 struct hlist_node s_instances; 1535 struct hlist_node s_instances;
1506 struct quota_info s_dquot; /* Diskquota specific options */ 1536 struct quota_info s_dquot; /* Diskquota specific options */
1507 1537
1508 int s_frozen; 1538 struct sb_writers s_writers;
1509 wait_queue_head_t s_wait_unfrozen;
1510 1539
1511 char s_id[32]; /* Informational name */ 1540 char s_id[32]; /* Informational name */
1512 u8 s_uuid[16]; /* UUID */ 1541 u8 s_uuid[16]; /* UUID */
@@ -1561,14 +1590,117 @@ extern struct timespec current_fs_time(struct super_block *sb);
1561/* 1590/*
1562 * Snapshotting support. 1591 * Snapshotting support.
1563 */ 1592 */
1564enum {
1565 SB_UNFROZEN = 0,
1566 SB_FREEZE_WRITE = 1,
1567 SB_FREEZE_TRANS = 2,
1568};
1569 1593
1570#define vfs_check_frozen(sb, level) \ 1594void __sb_end_write(struct super_block *sb, int level);
1571 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) 1595int __sb_start_write(struct super_block *sb, int level, bool wait);
1596
1597/**
1598 * sb_end_write - drop write access to a superblock
1599 * @sb: the super we wrote to
1600 *
1601 * Decrement number of writers to the filesystem. Wake up possible waiters
1602 * wanting to freeze the filesystem.
1603 */
1604static inline void sb_end_write(struct super_block *sb)
1605{
1606 __sb_end_write(sb, SB_FREEZE_WRITE);
1607}
1608
1609/**
1610 * sb_end_pagefault - drop write access to a superblock from a page fault
1611 * @sb: the super we wrote to
1612 *
1613 * Decrement number of processes handling write page fault to the filesystem.
1614 * Wake up possible waiters wanting to freeze the filesystem.
1615 */
1616static inline void sb_end_pagefault(struct super_block *sb)
1617{
1618 __sb_end_write(sb, SB_FREEZE_PAGEFAULT);
1619}
1620
1621/**
1622 * sb_end_intwrite - drop write access to a superblock for internal fs purposes
1623 * @sb: the super we wrote to
1624 *
1625 * Decrement fs-internal number of writers to the filesystem. Wake up possible
1626 * waiters wanting to freeze the filesystem.
1627 */
1628static inline void sb_end_intwrite(struct super_block *sb)
1629{
1630 __sb_end_write(sb, SB_FREEZE_FS);
1631}
1632
1633/**
1634 * sb_start_write - get write access to a superblock
1635 * @sb: the super we write to
1636 *
1637 * When a process wants to write data or metadata to a file system (i.e. dirty
1638 * a page or an inode), it should embed the operation in a sb_start_write() -
1639 * sb_end_write() pair to get exclusion against file system freezing. This
1640 * function increments number of writers preventing freezing. If the file
1641 * system is already frozen, the function waits until the file system is
1642 * thawed.
1643 *
1644 * Since freeze protection behaves as a lock, users have to preserve
1645 * ordering of freeze protection and other filesystem locks. Generally,
1646 * freeze protection should be the outermost lock. In particular, we have:
1647 *
1648 * sb_start_write
1649 * -> i_mutex (write path, truncate, directory ops, ...)
1650 * -> s_umount (freeze_super, thaw_super)
1651 */
1652static inline void sb_start_write(struct super_block *sb)
1653{
1654 __sb_start_write(sb, SB_FREEZE_WRITE, true);
1655}
1656
1657static inline int sb_start_write_trylock(struct super_block *sb)
1658{
1659 return __sb_start_write(sb, SB_FREEZE_WRITE, false);
1660}
1661
1662/**
1663 * sb_start_pagefault - get write access to a superblock from a page fault
1664 * @sb: the super we write to
1665 *
1666 * When a process starts handling write page fault, it should embed the
1667 * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
1668 * exclusion against file system freezing. This is needed since the page fault
1669 * is going to dirty a page. This function increments number of running page
1670 * faults preventing freezing. If the file system is already frozen, the
1671 * function waits until the file system is thawed.
1672 *
1673 * Since page fault freeze protection behaves as a lock, users have to preserve
1674 * ordering of freeze protection and other filesystem locks. It is advised to
1675 * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault
1676 * handling code implies lock dependency:
1677 *
1678 * mmap_sem
1679 * -> sb_start_pagefault
1680 */
1681static inline void sb_start_pagefault(struct super_block *sb)
1682{
1683 __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true);
1684}
1685
1686/*
1687 * sb_start_intwrite - get write access to a superblock for internal fs purposes
1688 * @sb: the super we write to
1689 *
1690 * This is the third level of protection against filesystem freezing. It is
1691 * free for use by a filesystem. The only requirement is that it must rank
1692 * below sb_start_pagefault.
1693 *
1694 * For example filesystem can call sb_start_intwrite() when starting a
1695 * transaction which somewhat eases handling of freezing for internal sources
1696 * of filesystem changes (internal fs threads, discarding preallocation on file
1697 * close, etc.).
1698 */
1699static inline void sb_start_intwrite(struct super_block *sb)
1700{
1701 __sb_start_write(sb, SB_FREEZE_FS, true);
1702}
1703
1572 1704
1573extern bool inode_owner_or_capable(const struct inode *inode); 1705extern bool inode_owner_or_capable(const struct inode *inode);
1574 1706
@@ -1892,6 +2024,7 @@ struct file_system_type {
1892 struct lock_class_key s_lock_key; 2024 struct lock_class_key s_lock_key;
1893 struct lock_class_key s_umount_key; 2025 struct lock_class_key s_umount_key;
1894 struct lock_class_key s_vfs_rename_key; 2026 struct lock_class_key s_vfs_rename_key;
2027 struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
1895 2028
1896 struct lock_class_key i_lock_key; 2029 struct lock_class_key i_lock_key;
1897 struct lock_class_key i_mutex_key; 2030 struct lock_class_key i_mutex_key;
@@ -2334,9 +2467,6 @@ static inline void i_readcount_inc(struct inode *inode)
2334} 2467}
2335#endif 2468#endif
2336extern int do_pipe_flags(int *, int); 2469extern int do_pipe_flags(int *, int);
2337extern struct file *create_read_pipe(struct file *f, int flags);
2338extern struct file *create_write_pipe(int flags);
2339extern void free_write_pipe(struct file *);
2340 2470
2341extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2471extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2342extern struct file * open_exec(const char *); 2472extern struct file * open_exec(const char *);