aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-17 11:31:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-17 11:31:01 -0500
commitb8a7f3cd7e8212e5c572178ff3b5a514861036a5 (patch)
treef88fd0fc83a466a6d5781f90e7ed76cb2fa0f5d7
parenta377d181e65241344dd95aa4a42c477477be03f1 (diff)
parenteaff8079d4f1016a12e34ab323737314f24127dd (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: kill I_LOCK fold do_sync_file_range into sys_sync_file_range fix up O_SYNC comments VFS/fsstack: handle 32-bit smp + preempt + large files in fsstack_copy_inode_size fsstack/ecryptfs: remove unused get_nlinks param to fsstack_copy_attr_all vfs: remove extraneous NULL d_inode check from do_filp_open fs: no games with DCACHE_UNHASHED fs: anon_inodes implement dname dio: fix use-after-free
-rw-r--r--arch/alpha/include/asm/fcntl.h2
-rw-r--r--arch/mips/include/asm/fcntl.h2
-rw-r--r--arch/sparc/include/asm/fcntl.h2
-rw-r--r--fs/anon_inodes.c17
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/inode.c6
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/gfs2/inode.c2
-rw-r--r--fs/inode.c26
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/namei.c2
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/pipe.c18
-rw-r--r--fs/stack.c71
-rw-r--r--fs/sync.c59
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_iget.c4
-rw-r--r--include/asm-generic/fcntl.h2
-rw-r--r--include/linux/fs.h40
-rw-r--r--include/linux/fs_stack.h6
-rw-r--r--include/linux/writeback.h3
-rw-r--r--net/socket.c19
24 files changed, 136 insertions, 163 deletions
diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 21b1117a0c61..70145cbb21cb 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -16,7 +16,7 @@
16#define O_NOATIME 04000000 16#define O_NOATIME 04000000
17#define O_CLOEXEC 010000000 /* set close_on_exec */ 17#define O_CLOEXEC 010000000 /* set close_on_exec */
18/* 18/*
19 * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using 19 * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
20 * the O_SYNC flag. We continue to use the existing numerical value 20 * the O_SYNC flag. We continue to use the existing numerical value
21 * for O_DSYNC semantics now, but using the correct symbolic name for it. 21 * for O_DSYNC semantics now, but using the correct symbolic name for it.
22 * This new value is used to request true Posix O_SYNC semantics. It is 22 * This new value is used to request true Posix O_SYNC semantics. It is
diff --git a/arch/mips/include/asm/fcntl.h b/arch/mips/include/asm/fcntl.h
index 7c6681aa2ab8..e482fe90fe88 100644
--- a/arch/mips/include/asm/fcntl.h
+++ b/arch/mips/include/asm/fcntl.h
@@ -19,7 +19,7 @@
19#define FASYNC 0x1000 /* fcntl, for BSD compatibility */ 19#define FASYNC 0x1000 /* fcntl, for BSD compatibility */
20#define O_LARGEFILE 0x2000 /* allow large file opens */ 20#define O_LARGEFILE 0x2000 /* allow large file opens */
21/* 21/*
22 * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using 22 * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
23 * the O_SYNC flag. We continue to use the existing numerical value 23 * the O_SYNC flag. We continue to use the existing numerical value
24 * for O_DSYNC semantics now, but using the correct symbolic name for it. 24 * for O_DSYNC semantics now, but using the correct symbolic name for it.
25 * This new value is used to request true Posix O_SYNC semantics. It is 25 * This new value is used to request true Posix O_SYNC semantics. It is
diff --git a/arch/sparc/include/asm/fcntl.h b/arch/sparc/include/asm/fcntl.h
index 3b9cfb39175e..38f37b333cc7 100644
--- a/arch/sparc/include/asm/fcntl.h
+++ b/arch/sparc/include/asm/fcntl.h
@@ -19,7 +19,7 @@
19#define O_NOATIME 0x200000 19#define O_NOATIME 0x200000
20#define O_CLOEXEC 0x400000 20#define O_CLOEXEC 0x400000
21/* 21/*
22 * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using 22 * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
23 * the O_SYNC flag. We continue to use the existing numerical value 23 * the O_SYNC flag. We continue to use the existing numerical value
24 * for O_DSYNC semantics now, but using the correct symbolic name for it. 24 * for O_DSYNC semantics now, but using the correct symbolic name for it.
25 * This new value is used to request true Posix O_SYNC semantics. It is 25 * This new value is used to request true Posix O_SYNC semantics. It is
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 94f5110c4655..2c994591f4d7 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -35,14 +35,13 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
35 mnt); 35 mnt);
36} 36}
37 37
38static int anon_inodefs_delete_dentry(struct dentry *dentry) 38/*
39 * anon_inodefs_dname() is called from d_path().
40 */
41static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
39{ 42{
40 /* 43 return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
41 * We faked vfs to believe the dentry was hashed when we created it. 44 dentry->d_name.name);
42 * Now we restore the flag so that dput() will work correctly.
43 */
44 dentry->d_flags |= DCACHE_UNHASHED;
45 return 1;
46} 45}
47 46
48static struct file_system_type anon_inode_fs_type = { 47static struct file_system_type anon_inode_fs_type = {
@@ -51,7 +50,7 @@ static struct file_system_type anon_inode_fs_type = {
51 .kill_sb = kill_anon_super, 50 .kill_sb = kill_anon_super,
52}; 51};
53static const struct dentry_operations anon_inodefs_dentry_operations = { 52static const struct dentry_operations anon_inodefs_dentry_operations = {
54 .d_delete = anon_inodefs_delete_dentry, 53 .d_dname = anon_inodefs_dname,
55}; 54};
56 55
57/* 56/*
@@ -119,8 +118,6 @@ struct file *anon_inode_getfile(const char *name,
119 atomic_inc(&anon_inode_inode->i_count); 118 atomic_inc(&anon_inode_inode->i_count);
120 119
121 path.dentry->d_op = &anon_inodefs_dentry_operations; 120 path.dentry->d_op = &anon_inodefs_dentry_operations;
122 /* Do not publish this dentry inside the global dentry hash table */
123 path.dentry->d_flags &= ~DCACHE_UNHASHED;
124 d_instantiate(path.dentry, anon_inode_inode); 121 d_instantiate(path.dentry, anon_inode_inode);
125 122
126 error = -ENFILE; 123 error = -ENFILE;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4012885d027f..e82adc2debb7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1206,7 +1206,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1206 * NOTE: filesystems with their own locking have to handle this 1206 * NOTE: filesystems with their own locking have to handle this
1207 * on their own. 1207 * on their own.
1208 */ 1208 */
1209 if (dio->flags & DIO_LOCKING) { 1209 if (flags & DIO_LOCKING) {
1210 if (unlikely((rw & WRITE) && retval < 0)) { 1210 if (unlikely((rw & WRITE) && retval < 0)) {
1211 loff_t isize = i_size_read(inode); 1211 loff_t isize = i_size_read(inode);
1212 if (end > isize) 1212 if (end > isize)
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75bc..8f006a0d6076 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
62 struct inode *lower_inode = 62 struct inode *lower_inode =
63 ecryptfs_inode_to_lower(dentry->d_inode); 63 ecryptfs_inode_to_lower(dentry->d_inode);
64 64
65 fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL); 65 fsstack_copy_attr_all(dentry->d_inode, lower_inode);
66 } 66 }
67out: 67out:
68 return rc; 68 return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0de..429ca0b3ba08 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
626 lower_new_dir_dentry->d_inode, lower_new_dentry); 626 lower_new_dir_dentry->d_inode, lower_new_dentry);
627 if (rc) 627 if (rc)
628 goto out_lock; 628 goto out_lock;
629 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL); 629 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
630 if (new_dir != old_dir) 630 if (new_dir != old_dir)
631 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL); 631 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
632out_lock: 632out_lock:
633 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 633 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
634 dput(lower_new_dentry->d_parent); 634 dput(lower_new_dentry->d_parent);
@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
967 rc = notify_change(lower_dentry, ia); 967 rc = notify_change(lower_dentry, ia);
968 mutex_unlock(&lower_dentry->d_inode->i_mutex); 968 mutex_unlock(&lower_dentry->d_inode->i_mutex);
969out: 969out:
970 fsstack_copy_attr_all(inode, lower_inode, NULL); 970 fsstack_copy_attr_all(inode, lower_inode);
971 return rc; 971 return rc;
972} 972}
973 973
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 101fe4c7b1ee..567bc4b9f70a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
189 init_special_inode(inode, lower_inode->i_mode, 189 init_special_inode(inode, lower_inode->i_mode,
190 lower_inode->i_rdev); 190 lower_inode->i_rdev);
191 dentry->d_op = &ecryptfs_dops; 191 dentry->d_op = &ecryptfs_dops;
192 fsstack_copy_attr_all(inode, lower_inode, NULL); 192 fsstack_copy_attr_all(inode, lower_inode);
193 /* This size will be overwritten for real files w/ headers and 193 /* This size will be overwritten for real files w/ headers and
194 * other metadata */ 194 * other metadata */
195 fsstack_copy_inode_size(inode, lower_inode); 195 fsstack_copy_inode_size(inode, lower_inode);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3ff32fa793da..6e220f4eee7d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code 125 * directory entry when gfs2_inode_lookup() is invoked. Part of the code
126 * segment inside gfs2_inode_lookup code needs to get moved around. 126 * segment inside gfs2_inode_lookup code needs to get moved around.
127 * 127 *
128 * Clean up I_LOCK and I_NEW as well. 128 * Clears I_NEW as well.
129 **/ 129 **/
130 130
131void gfs2_set_iop(struct inode *inode) 131void gfs2_set_iop(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02de611..03dfeb2e3928 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
113 * Prevent speculative execution through spin_unlock(&inode_lock); 113 * Prevent speculative execution through spin_unlock(&inode_lock);
114 */ 114 */
115 smp_mb(); 115 smp_mb();
116 wake_up_bit(&inode->i_state, __I_LOCK); 116 wake_up_bit(&inode->i_state, __I_NEW);
117} 117}
118 118
119/** 119/**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
690 } 690 }
691#endif 691#endif
692 /* 692 /*
693 * This is special! We do not need the spinlock when clearing I_LOCK, 693 * This is special! We do not need the spinlock when clearing I_NEW,
694 * because we're guaranteed that nobody else tries to do anything about 694 * because we're guaranteed that nobody else tries to do anything about
695 * the state of the inode when it is locked, as we just created it (so 695 * the state of the inode when it is locked, as we just created it (so
696 * there can be no old holders that haven't tested I_LOCK). 696 * there can be no old holders that haven't tested I_NEW).
697 * However we must emit the memory barrier so that other CPUs reliably 697 * However we must emit the memory barrier so that other CPUs reliably
698 * see the clearing of I_LOCK after the other inode initialisation has 698 * see the clearing of I_NEW after the other inode initialisation has
699 * completed. 699 * completed.
700 */ 700 */
701 smp_mb(); 701 smp_mb();
702 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW)); 702 WARN_ON(!(inode->i_state & I_NEW));
703 inode->i_state &= ~(I_LOCK|I_NEW); 703 inode->i_state &= ~I_NEW;
704 wake_up_inode(inode); 704 wake_up_inode(inode);
705} 705}
706EXPORT_SYMBOL(unlock_new_inode); 706EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
731 goto set_failed; 731 goto set_failed;
732 732
733 __inode_add_to_lists(sb, head, inode); 733 __inode_add_to_lists(sb, head, inode);
734 inode->i_state = I_LOCK|I_NEW; 734 inode->i_state = I_NEW;
735 spin_unlock(&inode_lock); 735 spin_unlock(&inode_lock);
736 736
737 /* Return the locked inode with I_NEW set, the 737 /* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
778 if (!old) { 778 if (!old) {
779 inode->i_ino = ino; 779 inode->i_ino = ino;
780 __inode_add_to_lists(sb, head, inode); 780 __inode_add_to_lists(sb, head, inode);
781 inode->i_state = I_LOCK|I_NEW; 781 inode->i_state = I_NEW;
782 spin_unlock(&inode_lock); 782 spin_unlock(&inode_lock);
783 783
784 /* Return the locked inode with I_NEW set, the 784 /* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
1083 ino_t ino = inode->i_ino; 1083 ino_t ino = inode->i_ino;
1084 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1084 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1085 1085
1086 inode->i_state |= I_LOCK|I_NEW; 1086 inode->i_state |= I_NEW;
1087 while (1) { 1087 while (1) {
1088 struct hlist_node *node; 1088 struct hlist_node *node;
1089 struct inode *old = NULL; 1089 struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1120 struct super_block *sb = inode->i_sb; 1120 struct super_block *sb = inode->i_sb;
1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1121 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1122 1122
1123 inode->i_state |= I_LOCK|I_NEW; 1123 inode->i_state |= I_NEW;
1124 1124
1125 while (1) { 1125 while (1) {
1126 struct hlist_node *node; 1126 struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
1510 * until the deletion _might_ have completed. Callers are responsible 1510 * until the deletion _might_ have completed. Callers are responsible
1511 * to recheck inode state. 1511 * to recheck inode state.
1512 * 1512 *
1513 * It doesn't matter if I_LOCK is not set initially, a call to 1513 * It doesn't matter if I_NEW is not set initially, a call to
1514 * wake_up_inode() after removing from the hash list will DTRT. 1514 * wake_up_inode() after removing from the hash list will DTRT.
1515 * 1515 *
1516 * This is called with inode_lock held. 1516 * This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
1518static void __wait_on_freeing_inode(struct inode *inode) 1518static void __wait_on_freeing_inode(struct inode *inode)
1519{ 1519{
1520 wait_queue_head_t *wq; 1520 wait_queue_head_t *wq;
1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK); 1521 DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1522 wq = bit_waitqueue(&inode->i_state, __I_LOCK); 1522 wq = bit_waitqueue(&inode->i_state, __I_NEW);
1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); 1523 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1524 spin_unlock(&inode_lock); 1524 spin_unlock(&inode_lock);
1525 schedule(); 1525 schedule();
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada5..d945ea76b445 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1292 */ 1292 */
1293 /* 1293 /*
1294 * I believe this code is no longer needed. Splitting I_LOCK 1294 * I believe this code is no longer needed. Splitting I_LOCK
1295 * into two bits, I_LOCK and I_SYNC should prevent this 1295 * into two bits, I_NEW and I_SYNC should prevent this
1296 * deadlock as well. But since I don't have a JFS testload 1296 * deadlock as well. But since I don't have a JFS testload
1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. 1297 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
1298 * Joern 1298 * Joern
diff --git a/fs/namei.c b/fs/namei.c
index d2783c8a770b..dad4b80257db 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1764,7 +1764,7 @@ do_last:
1764 1764
1765 path_to_nameidata(&path, &nd); 1765 path_to_nameidata(&path, &nd);
1766 error = -EISDIR; 1766 error = -EISDIR;
1767 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1767 if (S_ISDIR(path.dentry->d_inode->i_mode))
1768 goto exit; 1768 goto exit;
1769ok: 1769ok:
1770 /* 1770 /*
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762cc..dc2505abb6d7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
530 * the ntfs inode. 530 * the ntfs inode.
531 * 531 *
532 * Q: What locks are held when the function is called? 532 * Q: What locks are held when the function is called?
533 * A: i_state has I_LOCK set, hence the inode is locked, also 533 * A: i_state has I_NEW set, hence the inode is locked, also
534 * i_count is set to 1, so it is not going to go away 534 * i_count is set to 1, so it is not going to go away
535 * i_flags is set to 0 and we have no business touching it. Only an ioctl() 535 * i_flags is set to 0 and we have no business touching it. Only an ioctl()
536 * is allowed to write to them. We should of course be honouring them but 536 * is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
1207 * necessary fields in @vi as well as initializing the ntfs inode. 1207 * necessary fields in @vi as well as initializing the ntfs inode.
1208 * 1208 *
1209 * Q: What locks are held when the function is called? 1209 * Q: What locks are held when the function is called?
1210 * A: i_state has I_LOCK set, hence the inode is locked, also 1210 * A: i_state has I_NEW set, hence the inode is locked, also
1211 * i_count is set to 1, so it is not going to go away 1211 * i_count is set to 1, so it is not going to go away
1212 * 1212 *
1213 * Return 0 on success and -errno on error. In the error case, the inode will 1213 * Return 0 on success and -errno on error. In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
1474 * normal directory inodes. 1474 * normal directory inodes.
1475 * 1475 *
1476 * Q: What locks are held when the function is called? 1476 * Q: What locks are held when the function is called?
1477 * A: i_state has I_LOCK set, hence the inode is locked, also 1477 * A: i_state has I_NEW set, hence the inode is locked, also
1478 * i_count is set to 1, so it is not going to go away 1478 * i_count is set to 1, so it is not going to go away
1479 * 1479 *
1480 * Return 0 on success and -errno on error. In the error case, the inode will 1480 * Return 0 on success and -errno on error. In the error case, the inode will
diff --git a/fs/pipe.c b/fs/pipe.c
index 43d79da5c57e..37ba29ff3158 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode)
906} 906}
907 907
908static struct vfsmount *pipe_mnt __read_mostly; 908static struct vfsmount *pipe_mnt __read_mostly;
909static int pipefs_delete_dentry(struct dentry *dentry)
910{
911 /*
912 * At creation time, we pretended this dentry was hashed
913 * (by clearing DCACHE_UNHASHED bit in d_flags)
914 * At delete time, we restore the truth : not hashed.
915 * (so that dput() can proceed correctly)
916 */
917 dentry->d_flags |= DCACHE_UNHASHED;
918 return 0;
919}
920 909
921/* 910/*
922 * pipefs_dname() is called from d_path(). 911 * pipefs_dname() is called from d_path().
@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
928} 917}
929 918
930static const struct dentry_operations pipefs_dentry_operations = { 919static const struct dentry_operations pipefs_dentry_operations = {
931 .d_delete = pipefs_delete_dentry,
932 .d_dname = pipefs_dname, 920 .d_dname = pipefs_dname,
933}; 921};
934 922
@@ -989,12 +977,6 @@ struct file *create_write_pipe(int flags)
989 path.mnt = mntget(pipe_mnt); 977 path.mnt = mntget(pipe_mnt);
990 978
991 path.dentry->d_op = &pipefs_dentry_operations; 979 path.dentry->d_op = &pipefs_dentry_operations;
992 /*
993 * We dont want to publish this dentry into global dentry hash table.
994 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
995 * This permits a working /proc/$pid/fd/XXX on pipes
996 */
997 path.dentry->d_flags &= ~DCACHE_UNHASHED;
998 d_instantiate(path.dentry, inode); 980 d_instantiate(path.dentry, inode);
999 981
1000 err = -ENFILE; 982 err = -ENFILE;
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4a..4a6f7f440658 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,18 +7,63 @@
7 * This function cannot be inlined since i_size_{read,write} is rather 7 * This function cannot be inlined since i_size_{read,write} is rather
8 * heavy-weight on 32-bit systems 8 * heavy-weight on 32-bit systems
9 */ 9 */
10void fsstack_copy_inode_size(struct inode *dst, const struct inode *src) 10void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
11{ 11{
12 i_size_write(dst, i_size_read((struct inode *)src)); 12 loff_t i_size;
13 dst->i_blocks = src->i_blocks; 13 blkcnt_t i_blocks;
14
15 /*
16 * i_size_read() includes its own seqlocking and protection from
17 * preemption (see include/linux/fs.h): we need nothing extra for
18 * that here, and prefer to avoid nesting locks than attempt to keep
19 * i_size and i_blocks in sync together.
20 */
21 i_size = i_size_read(src);
22
23 /*
24 * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
25 * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
26 * though stat's generic_fillattr() doesn't bother, and we won't be
27 * applying quotas (where i_blocks does become important) at the
28 * upper level.
29 *
30 * We don't actually know what locking is used at the lower level;
31 * but if it's a filesystem that supports quotas, it will be using
32 * i_lock as in inode_add_bytes(). tmpfs uses other locking, and
33 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
34 * holes; but its i_blocks cannot carry into the upper long without
35 * almost 2TB swap - let's ignore that case.
36 */
37 if (sizeof(i_blocks) > sizeof(long))
38 spin_lock(&src->i_lock);
39 i_blocks = src->i_blocks;
40 if (sizeof(i_blocks) > sizeof(long))
41 spin_unlock(&src->i_lock);
42
43 /*
44 * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
45 * fsstack_copy_inode_size() to hold some lock around
46 * i_size_write(), otherwise i_size_read() may spin forever (see
47 * include/linux/fs.h). We don't necessarily hold i_mutex when this
48 * is called, so take i_lock for that case.
49 *
50 * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
51 * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
52 * for that case too, and do both at once by combining the tests.
53 *
54 * There is none of this locking overhead in the 64-bit case.
55 */
56 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
57 spin_lock(&dst->i_lock);
58 i_size_write(dst, i_size);
59 dst->i_blocks = i_blocks;
60 if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
61 spin_unlock(&dst->i_lock);
14} 62}
15EXPORT_SYMBOL_GPL(fsstack_copy_inode_size); 63EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
16 64
17/* copy all attributes; get_nlinks is optional way to override the i_nlink 65/* copy all attributes */
18 * copying 66void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
19 */
20void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
21 int (*get_nlinks)(struct inode *))
22{ 67{
23 dest->i_mode = src->i_mode; 68 dest->i_mode = src->i_mode;
24 dest->i_uid = src->i_uid; 69 dest->i_uid = src->i_uid;
@@ -29,14 +74,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
29 dest->i_ctime = src->i_ctime; 74 dest->i_ctime = src->i_ctime;
30 dest->i_blkbits = src->i_blkbits; 75 dest->i_blkbits = src->i_blkbits;
31 dest->i_flags = src->i_flags; 76 dest->i_flags = src->i_flags;
32 77 dest->i_nlink = src->i_nlink;
33 /*
34 * Update the nlinks AFTER updating the above fields, because the
35 * get_links callback may depend on them.
36 */
37 if (!get_nlinks)
38 dest->i_nlink = src->i_nlink;
39 else
40 dest->i_nlink = (*get_nlinks)(dest);
41} 78}
42EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); 79EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/sync.c b/fs/sync.c
index 36752a683481..418727a2a239 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
355{ 355{
356 int ret; 356 int ret;
357 struct file *file; 357 struct file *file;
358 struct address_space *mapping;
358 loff_t endbyte; /* inclusive */ 359 loff_t endbyte; /* inclusive */
359 int fput_needed; 360 int fput_needed;
360 umode_t i_mode; 361 umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
405 !S_ISLNK(i_mode)) 406 !S_ISLNK(i_mode))
406 goto out_put; 407 goto out_put;
407 408
408 ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags); 409 mapping = file->f_mapping;
410 if (!mapping) {
411 ret = -EINVAL;
412 goto out_put;
413 }
414
415 ret = 0;
416 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
417 ret = filemap_fdatawait_range(mapping, offset, endbyte);
418 if (ret < 0)
419 goto out_put;
420 }
421
422 if (flags & SYNC_FILE_RANGE_WRITE) {
423 ret = filemap_fdatawrite_range(mapping, offset, endbyte);
424 if (ret < 0)
425 goto out_put;
426 }
427
428 if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
429 ret = filemap_fdatawait_range(mapping, offset, endbyte);
430
409out_put: 431out_put:
410 fput_light(file, fput_needed); 432 fput_light(file, fput_needed);
411out: 433out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
437} 459}
438SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); 460SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
439#endif 461#endif
440
441/*
442 * `endbyte' is inclusive
443 */
444int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
445 loff_t endbyte, unsigned int flags)
446{
447 int ret;
448
449 if (!mapping) {
450 ret = -EINVAL;
451 goto out;
452 }
453
454 ret = 0;
455 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
456 ret = filemap_fdatawait_range(mapping, offset, endbyte);
457 if (ret < 0)
458 goto out;
459 }
460
461 if (flags & SYNC_FILE_RANGE_WRITE) {
462 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
463 WB_SYNC_ALL);
464 if (ret < 0)
465 goto out;
466 }
467
468 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
469 ret = filemap_fdatawait_range(mapping, offset, endbyte);
470 }
471out:
472 return ret;
473}
474EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f887e72..16a6444330ec 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
45 * 45 *
46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the 46 * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> 47 * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
48 * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not 48 * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
49 * set as well. However, UBIFS disables readahead. 49 * set as well. However, UBIFS disables readahead.
50 */ 50 */
51 51
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1d5b298ba8b2..225946012d0b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -794,7 +794,7 @@ xfs_setup_inode(
794 struct inode *inode = &ip->i_vnode; 794 struct inode *inode = &ip->i_vnode;
795 795
796 inode->i_ino = ip->i_ino; 796 inode->i_ino = ip->i_ino;
797 inode->i_state = I_NEW|I_LOCK; 797 inode->i_state = I_NEW;
798 inode_add_to_lists(ip->i_mount->m_super, inode); 798 inode_add_to_lists(ip->i_mount->m_super, inode);
799 799
800 inode->i_mode = ip->i_d.di_mode; 800 inode->i_mode = ip->i_d.di_mode;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0de36c2a46f1..fa402a6bbbcf 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,7 @@ xfs_inode_alloc(
91 ip->i_new_size = 0; 91 ip->i_new_size = 0;
92 92
93 /* prevent anyone from using this yet */ 93 /* prevent anyone from using this yet */
94 VFS_I(ip)->i_state = I_NEW|I_LOCK; 94 VFS_I(ip)->i_state = I_NEW;
95 95
96 return ip; 96 return ip;
97} 97}
@@ -217,7 +217,7 @@ xfs_iget_cache_hit(
217 trace_xfs_iget_reclaim(ip); 217 trace_xfs_iget_reclaim(ip);
218 goto out_error; 218 goto out_error;
219 } 219 }
220 inode->i_state = I_LOCK|I_NEW; 220 inode->i_state = I_NEW;
221 } else { 221 } else {
222 /* If the VFS inode is being torn down, pause and try again. */ 222 /* If the VFS inode is being torn down, pause and try again. */
223 if (!igrab(inode)) { 223 if (!igrab(inode)) {
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 681ddf3e844c..fcd268ce0674 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -51,7 +51,7 @@
51#endif 51#endif
52 52
53/* 53/*
54 * Before Linux 2.6.32 only O_DSYNC semantics were implemented, but using 54 * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
55 * the O_SYNC flag. We continue to use the existing numerical value 55 * the O_SYNC flag. We continue to use the existing numerical value
56 * for O_DSYNC semantics now, but using the correct symbolic name for it. 56 * for O_DSYNC semantics now, but using the correct symbolic name for it.
57 * This new value is used to request true Posix O_SYNC semantics. It is 57 * This new value is used to request true Posix O_SYNC semantics. It is
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 66bc0a54b284..cca191933ff6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,10 +1095,6 @@ struct file_lock {
1095 1095
1096extern void send_sigio(struct fown_struct *fown, int fd, int band); 1096extern void send_sigio(struct fown_struct *fown, int fd, int band);
1097 1097
1098/* fs/sync.c */
1099extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
1100 loff_t endbyte, unsigned int flags);
1101
1102#ifdef CONFIG_FILE_LOCKING 1098#ifdef CONFIG_FILE_LOCKING
1103extern int fcntl_getlk(struct file *, struct flock __user *); 1099extern int fcntl_getlk(struct file *, struct flock __user *);
1104extern int fcntl_setlk(unsigned int, struct file *, unsigned int, 1100extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
@@ -1591,7 +1587,7 @@ struct super_operations {
1591 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at 1587 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
1592 * various stages of removing an inode. 1588 * various stages of removing an inode.
1593 * 1589 *
1594 * Two bits are used for locking and completion notification, I_LOCK and I_SYNC. 1590 * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
1595 * 1591 *
1596 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on 1592 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
1597 * fdatasync(). i_atime is the usual cause. 1593 * fdatasync(). i_atime is the usual cause.
@@ -1600,8 +1596,14 @@ struct super_operations {
1600 * don't have to write inode on fdatasync() when only 1596 * don't have to write inode on fdatasync() when only
1601 * mtime has changed in it. 1597 * mtime has changed in it.
1602 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. 1598 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
1603 * I_NEW get_new_inode() sets i_state to I_LOCK|I_NEW. Both 1599 * I_NEW Serves as both a mutex and completion notification.
1604 * are cleared by unlock_new_inode(), called from iget(). 1600 * New inodes set I_NEW. If two processes both create
1601 * the same inode, one of them will release its inode and
1602 * wait for I_NEW to be released before returning.
1603 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
1604 * also cause waiting on I_NEW, without I_NEW actually
1605 * being set. find_inode() uses this to prevent returning
1606 * nearly-dead inodes.
1605 * I_WILL_FREE Must be set when calling write_inode_now() if i_count 1607 * I_WILL_FREE Must be set when calling write_inode_now() if i_count
1606 * is zero. I_FREEING must be set when I_WILL_FREE is 1608 * is zero. I_FREEING must be set when I_WILL_FREE is
1607 * cleared. 1609 * cleared.
@@ -1615,20 +1617,11 @@ struct super_operations {
1615 * prohibited for many purposes. iget() must wait for 1617 * prohibited for many purposes. iget() must wait for
1616 * the inode to be completely released, then create it 1618 * the inode to be completely released, then create it
1617 * anew. Other functions will just ignore such inodes, 1619 * anew. Other functions will just ignore such inodes,
1618 * if appropriate. I_LOCK is used for waiting. 1620 * if appropriate. I_NEW is used for waiting.
1619 * 1621 *
1620 * I_LOCK Serves as both a mutex and completion notification. 1622 * I_SYNC Synchonized write of dirty inode data. The bits is
1621 * New inodes set I_LOCK. If two processes both create 1623 * set during data writeback, and cleared with a wakeup
1622 * the same inode, one of them will release its inode and 1624 * on the bit address once it is done.
1623 * wait for I_LOCK to be released before returning.
1624 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
1625 * also cause waiting on I_LOCK, without I_LOCK actually
1626 * being set. find_inode() uses this to prevent returning
1627 * nearly-dead inodes.
1628 * I_SYNC Similar to I_LOCK, but limited in scope to writeback
1629 * of inode dirty data. Having a separate lock for this
1630 * purpose reduces latency and prevents some filesystem-
1631 * specific deadlocks.
1632 * 1625 *
1633 * Q: What is the difference between I_WILL_FREE and I_FREEING? 1626 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1634 * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on 1627 * Q: igrab() only checks on (I_FREEING|I_WILL_FREE). Should it also check on
@@ -1637,13 +1630,12 @@ struct super_operations {
1637#define I_DIRTY_SYNC 1 1630#define I_DIRTY_SYNC 1
1638#define I_DIRTY_DATASYNC 2 1631#define I_DIRTY_DATASYNC 2
1639#define I_DIRTY_PAGES 4 1632#define I_DIRTY_PAGES 4
1640#define I_NEW 8 1633#define __I_NEW 3
1634#define I_NEW (1 << __I_NEW)
1641#define I_WILL_FREE 16 1635#define I_WILL_FREE 16
1642#define I_FREEING 32 1636#define I_FREEING 32
1643#define I_CLEAR 64 1637#define I_CLEAR 64
1644#define __I_LOCK 7 1638#define __I_SYNC 7
1645#define I_LOCK (1 << __I_LOCK)
1646#define __I_SYNC 8
1647#define I_SYNC (1 << __I_SYNC) 1639#define I_SYNC (1 << __I_SYNC)
1648 1640
1649#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) 1641#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index bb516ceeefc9..da317c7163ab 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -8,10 +8,8 @@
8#include <linux/fs.h> 8#include <linux/fs.h>
9 9
10/* externs for fs/stack.c */ 10/* externs for fs/stack.c */
11extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, 11extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
12 int (*get_nlinks)(struct inode *)); 12extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src);
13
14extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
15 13
16/* inlines */ 14/* inlines */
17static inline void fsstack_copy_attr_atime(struct inode *dest, 15static inline void fsstack_copy_attr_atime(struct inode *dest,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 705f01fe413a..c18c008f4bbf 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,8 +79,7 @@ void wakeup_flusher_threads(long nr_pages);
79static inline void wait_on_inode(struct inode *inode) 79static inline void wait_on_inode(struct inode *inode)
80{ 80{
81 might_sleep(); 81 might_sleep();
82 wait_on_bit(&inode->i_state, __I_LOCK, inode_wait, 82 wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
83 TASK_UNINTERRUPTIBLE);
84} 83}
85static inline void inode_sync_wait(struct inode *inode) 84static inline void inode_sync_wait(struct inode *inode)
86{ 85{
diff --git a/net/socket.c b/net/socket.c
index dbfdfa96d29b..769c386bd428 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -312,18 +312,6 @@ static struct file_system_type sock_fs_type = {
312 .kill_sb = kill_anon_super, 312 .kill_sb = kill_anon_super,
313}; 313};
314 314
315static int sockfs_delete_dentry(struct dentry *dentry)
316{
317 /*
318 * At creation time, we pretended this dentry was hashed
319 * (by clearing DCACHE_UNHASHED bit in d_flags)
320 * At delete time, we restore the truth : not hashed.
321 * (so that dput() can proceed correctly)
322 */
323 dentry->d_flags |= DCACHE_UNHASHED;
324 return 0;
325}
326
327/* 315/*
328 * sockfs_dname() is called from d_path(). 316 * sockfs_dname() is called from d_path().
329 */ 317 */
@@ -334,7 +322,6 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
334} 322}
335 323
336static const struct dentry_operations sockfs_dentry_operations = { 324static const struct dentry_operations sockfs_dentry_operations = {
337 .d_delete = sockfs_delete_dentry,
338 .d_dname = sockfs_dname, 325 .d_dname = sockfs_dname,
339}; 326};
340 327
@@ -374,12 +361,6 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
374 path.mnt = mntget(sock_mnt); 361 path.mnt = mntget(sock_mnt);
375 362
376 path.dentry->d_op = &sockfs_dentry_operations; 363 path.dentry->d_op = &sockfs_dentry_operations;
377 /*
378 * We dont want to push this dentry into global dentry hash table.
379 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
380 * This permits a working /proc/$pid/fd/XXX on sockets
381 */
382 path.dentry->d_flags &= ~DCACHE_UNHASHED;
383 d_instantiate(path.dentry, SOCK_INODE(sock)); 364 d_instantiate(path.dentry, SOCK_INODE(sock));
384 SOCK_INODE(sock)->i_fop = &socket_file_ops; 365 SOCK_INODE(sock)->i_fop = &socket_file_ops;
385 366