aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-18 12:35:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-18 12:35:08 -0400
commit145c3ae46b37993b0debb0b3da6256daea4a6ec5 (patch)
tree0dbff382ce36b23b3d2dbff87d3eaab73a07a2a4
parent81ca03a0e2ea0207b2df80e0edcf4c775c07a505 (diff)
parent99b7db7b8ffd6bb755eb0a175596421a0b581cb2 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: fs: brlock vfsmount_lock fs: scale files_lock lglock: introduce special lglock and brlock spin locks tty: fix fu_list abuse fs: cleanup files_lock locking fs: remove extra lookup in __lookup_hash fs: fs_struct rwlock to spinlock apparmor: use task path helpers fs: dentry allocation consolidation fs: fix do_lookup false negative mbcache: Limit the maximum number of cache entries hostfs ->follow_link() braino hostfs: dumb (and usually harmless) tpyo - strncpy instead of strlcpy remove SWRITE* I/O types kill BH_Ordered flag vfs: update ctime when changing the file's permission by setfacl cramfs: only unlock new inodes fix reiserfs_evict_inode end_writeback second call
-rw-r--r--drivers/char/pty.c4
-rw-r--r--drivers/char/tty_io.c92
-rw-r--r--drivers/staging/pohmelfs/path_entry.c8
-rw-r--r--fs/buffer.c69
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/dcache.c71
-rw-r--r--fs/exec.c4
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/file_table.c124
-rw-r--r--fs/fs_struct.c32
-rw-r--r--fs/generic_acl.c1
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/internal.h7
-rw-r--r--fs/jbd/checkpoint.c4
-rw-r--r--fs/jbd/commit.c49
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jbd2/checkpoint.c4
-rw-r--r--fs/jbd2/commit.c39
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jbd2/revoke.c2
-rw-r--r--fs/mbcache.c30
-rw-r--r--fs/namei.c119
-rw-r--r--fs/namespace.c177
-rw-r--r--fs/nilfs2/super.c28
-rw-r--r--fs/open.c4
-rw-r--r--fs/pnode.c11
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/super.c18
-rw-r--r--fs/ufs/balloc.c24
-rw-r--r--fs/ufs/ialloc.c18
-rw-r--r--fs/ufs/truncate.c18
-rw-r--r--fs/ufs/util.c20
-rw-r--r--fs/ufs/util.h3
-rw-r--r--include/linux/buffer_head.h4
-rw-r--r--include/linux/fs.h21
-rw-r--r--include/linux/fs_struct.h14
-rw-r--r--include/linux/lglock.h172
-rw-r--r--include/linux/tty.h9
-rw-r--r--kernel/fork.c10
-rw-r--r--security/apparmor/path.c9
-rw-r--r--security/selinux/hooks.c9
43 files changed, 797 insertions, 450 deletions
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index ad46eae1f9bb..c350d01716bd 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -675,8 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp)
675 } 675 }
676 676
677 set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ 677 set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
678 filp->private_data = tty; 678
679 file_move(filp, &tty->tty_files); 679 tty_add_file(tty, filp);
680 680
681 retval = devpts_pty_new(inode, tty->link); 681 retval = devpts_pty_new(inode, tty->link);
682 if (retval) 682 if (retval)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 0350c42375a2..949067a0bd47 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */
136DEFINE_MUTEX(tty_mutex); 136DEFINE_MUTEX(tty_mutex);
137EXPORT_SYMBOL(tty_mutex); 137EXPORT_SYMBOL(tty_mutex);
138 138
139/* Spinlock to protect the tty->tty_files list */
140DEFINE_SPINLOCK(tty_files_lock);
141
139static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); 142static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
140static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); 143static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
141ssize_t redirected_tty_write(struct file *, const char __user *, 144ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -185,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty)
185 kfree(tty); 188 kfree(tty);
186} 189}
187 190
191static inline struct tty_struct *file_tty(struct file *file)
192{
193 return ((struct tty_file_private *)file->private_data)->tty;
194}
195
196/* Associate a new file with the tty structure */
197void tty_add_file(struct tty_struct *tty, struct file *file)
198{
199 struct tty_file_private *priv;
200
201 /* XXX: must implement proper error handling in callers */
202 priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL);
203
204 priv->tty = tty;
205 priv->file = file;
206 file->private_data = priv;
207
208 spin_lock(&tty_files_lock);
209 list_add(&priv->list, &tty->tty_files);
210 spin_unlock(&tty_files_lock);
211}
212
213/* Delete file from its tty */
214void tty_del_file(struct file *file)
215{
216 struct tty_file_private *priv = file->private_data;
217
218 spin_lock(&tty_files_lock);
219 list_del(&priv->list);
220 spin_unlock(&tty_files_lock);
221 file->private_data = NULL;
222 kfree(priv);
223}
224
225
188#define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) 226#define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
189 227
190/** 228/**
@@ -235,11 +273,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
235 struct list_head *p; 273 struct list_head *p;
236 int count = 0; 274 int count = 0;
237 275
238 file_list_lock(); 276 spin_lock(&tty_files_lock);
239 list_for_each(p, &tty->tty_files) { 277 list_for_each(p, &tty->tty_files) {
240 count++; 278 count++;
241 } 279 }
242 file_list_unlock(); 280 spin_unlock(&tty_files_lock);
243 if (tty->driver->type == TTY_DRIVER_TYPE_PTY && 281 if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
244 tty->driver->subtype == PTY_TYPE_SLAVE && 282 tty->driver->subtype == PTY_TYPE_SLAVE &&
245 tty->link && tty->link->count) 283 tty->link && tty->link->count)
@@ -497,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty)
497 struct file *cons_filp = NULL; 535 struct file *cons_filp = NULL;
498 struct file *filp, *f = NULL; 536 struct file *filp, *f = NULL;
499 struct task_struct *p; 537 struct task_struct *p;
538 struct tty_file_private *priv;
500 int closecount = 0, n; 539 int closecount = 0, n;
501 unsigned long flags; 540 unsigned long flags;
502 int refs = 0; 541 int refs = 0;
@@ -506,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty)
506 545
507 546
508 spin_lock(&redirect_lock); 547 spin_lock(&redirect_lock);
509 if (redirect && redirect->private_data == tty) { 548 if (redirect && file_tty(redirect) == tty) {
510 f = redirect; 549 f = redirect;
511 redirect = NULL; 550 redirect = NULL;
512 } 551 }
@@ -519,9 +558,10 @@ void __tty_hangup(struct tty_struct *tty)
519 workqueue with the lock held */ 558 workqueue with the lock held */
520 check_tty_count(tty, "tty_hangup"); 559 check_tty_count(tty, "tty_hangup");
521 560
522 file_list_lock(); 561 spin_lock(&tty_files_lock);
523 /* This breaks for file handles being sent over AF_UNIX sockets ? */ 562 /* This breaks for file handles being sent over AF_UNIX sockets ? */
524 list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { 563 list_for_each_entry(priv, &tty->tty_files, list) {
564 filp = priv->file;
525 if (filp->f_op->write == redirected_tty_write) 565 if (filp->f_op->write == redirected_tty_write)
526 cons_filp = filp; 566 cons_filp = filp;
527 if (filp->f_op->write != tty_write) 567 if (filp->f_op->write != tty_write)
@@ -530,7 +570,7 @@ void __tty_hangup(struct tty_struct *tty)
530 __tty_fasync(-1, filp, 0); /* can't block */ 570 __tty_fasync(-1, filp, 0); /* can't block */
531 filp->f_op = &hung_up_tty_fops; 571 filp->f_op = &hung_up_tty_fops;
532 } 572 }
533 file_list_unlock(); 573 spin_unlock(&tty_files_lock);
534 574
535 tty_ldisc_hangup(tty); 575 tty_ldisc_hangup(tty);
536 576
@@ -889,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
889 loff_t *ppos) 929 loff_t *ppos)
890{ 930{
891 int i; 931 int i;
892 struct tty_struct *tty; 932 struct inode *inode = file->f_path.dentry->d_inode;
893 struct inode *inode; 933 struct tty_struct *tty = file_tty(file);
894 struct tty_ldisc *ld; 934 struct tty_ldisc *ld;
895 935
896 tty = file->private_data;
897 inode = file->f_path.dentry->d_inode;
898 if (tty_paranoia_check(tty, inode, "tty_read")) 936 if (tty_paranoia_check(tty, inode, "tty_read"))
899 return -EIO; 937 return -EIO;
900 if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) 938 if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
@@ -1065,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg)
1065static ssize_t tty_write(struct file *file, const char __user *buf, 1103static ssize_t tty_write(struct file *file, const char __user *buf,
1066 size_t count, loff_t *ppos) 1104 size_t count, loff_t *ppos)
1067{ 1105{
1068 struct tty_struct *tty;
1069 struct inode *inode = file->f_path.dentry->d_inode; 1106 struct inode *inode = file->f_path.dentry->d_inode;
1107 struct tty_struct *tty = file_tty(file);
1108 struct tty_ldisc *ld;
1070 ssize_t ret; 1109 ssize_t ret;
1071 struct tty_ldisc *ld;
1072 1110
1073 tty = file->private_data;
1074 if (tty_paranoia_check(tty, inode, "tty_write")) 1111 if (tty_paranoia_check(tty, inode, "tty_write"))
1075 return -EIO; 1112 return -EIO;
1076 if (!tty || !tty->ops->write || 1113 if (!tty || !tty->ops->write ||
@@ -1424,9 +1461,9 @@ static void release_one_tty(struct work_struct *work)
1424 tty_driver_kref_put(driver); 1461 tty_driver_kref_put(driver);
1425 module_put(driver->owner); 1462 module_put(driver->owner);
1426 1463
1427 file_list_lock(); 1464 spin_lock(&tty_files_lock);
1428 list_del_init(&tty->tty_files); 1465 list_del_init(&tty->tty_files);
1429 file_list_unlock(); 1466 spin_unlock(&tty_files_lock);
1430 1467
1431 put_pid(tty->pgrp); 1468 put_pid(tty->pgrp);
1432 put_pid(tty->session); 1469 put_pid(tty->session);
@@ -1507,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx)
1507 1544
1508int tty_release(struct inode *inode, struct file *filp) 1545int tty_release(struct inode *inode, struct file *filp)
1509{ 1546{
1510 struct tty_struct *tty, *o_tty; 1547 struct tty_struct *tty = file_tty(filp);
1548 struct tty_struct *o_tty;
1511 int pty_master, tty_closing, o_tty_closing, do_sleep; 1549 int pty_master, tty_closing, o_tty_closing, do_sleep;
1512 int devpts; 1550 int devpts;
1513 int idx; 1551 int idx;
1514 char buf[64]; 1552 char buf[64];
1515 1553
1516 tty = filp->private_data;
1517 if (tty_paranoia_check(tty, inode, "tty_release_dev")) 1554 if (tty_paranoia_check(tty, inode, "tty_release_dev"))
1518 return 0; 1555 return 0;
1519 1556
@@ -1671,8 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp)
1671 * - do_tty_hangup no longer sees this file descriptor as 1708 * - do_tty_hangup no longer sees this file descriptor as
1672 * something that needs to be handled for hangups. 1709 * something that needs to be handled for hangups.
1673 */ 1710 */
1674 file_kill(filp); 1711 tty_del_file(filp);
1675 filp->private_data = NULL;
1676 1712
1677 /* 1713 /*
1678 * Perform some housekeeping before deciding whether to return. 1714 * Perform some housekeeping before deciding whether to return.
@@ -1839,8 +1875,8 @@ got_driver:
1839 return PTR_ERR(tty); 1875 return PTR_ERR(tty);
1840 } 1876 }
1841 1877
1842 filp->private_data = tty; 1878 tty_add_file(tty, filp);
1843 file_move(filp, &tty->tty_files); 1879
1844 check_tty_count(tty, "tty_open"); 1880 check_tty_count(tty, "tty_open");
1845 if (tty->driver->type == TTY_DRIVER_TYPE_PTY && 1881 if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
1846 tty->driver->subtype == PTY_TYPE_MASTER) 1882 tty->driver->subtype == PTY_TYPE_MASTER)
@@ -1916,11 +1952,10 @@ got_driver:
1916 1952
1917static unsigned int tty_poll(struct file *filp, poll_table *wait) 1953static unsigned int tty_poll(struct file *filp, poll_table *wait)
1918{ 1954{
1919 struct tty_struct *tty; 1955 struct tty_struct *tty = file_tty(filp);
1920 struct tty_ldisc *ld; 1956 struct tty_ldisc *ld;
1921 int ret = 0; 1957 int ret = 0;
1922 1958
1923 tty = filp->private_data;
1924 if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll")) 1959 if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll"))
1925 return 0; 1960 return 0;
1926 1961
@@ -1933,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait)
1933 1968
1934static int __tty_fasync(int fd, struct file *filp, int on) 1969static int __tty_fasync(int fd, struct file *filp, int on)
1935{ 1970{
1936 struct tty_struct *tty; 1971 struct tty_struct *tty = file_tty(filp);
1937 unsigned long flags; 1972 unsigned long flags;
1938 int retval = 0; 1973 int retval = 0;
1939 1974
1940 tty = filp->private_data;
1941 if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync")) 1975 if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync"))
1942 goto out; 1976 goto out;
1943 1977
@@ -2491,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty);
2491 */ 2525 */
2492long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2526long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2493{ 2527{
2494 struct tty_struct *tty, *real_tty; 2528 struct tty_struct *tty = file_tty(file);
2529 struct tty_struct *real_tty;
2495 void __user *p = (void __user *)arg; 2530 void __user *p = (void __user *)arg;
2496 int retval; 2531 int retval;
2497 struct tty_ldisc *ld; 2532 struct tty_ldisc *ld;
2498 struct inode *inode = file->f_dentry->d_inode; 2533 struct inode *inode = file->f_dentry->d_inode;
2499 2534
2500 tty = file->private_data;
2501 if (tty_paranoia_check(tty, inode, "tty_ioctl")) 2535 if (tty_paranoia_check(tty, inode, "tty_ioctl"))
2502 return -EINVAL; 2536 return -EINVAL;
2503 2537
@@ -2619,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
2619 unsigned long arg) 2653 unsigned long arg)
2620{ 2654{
2621 struct inode *inode = file->f_dentry->d_inode; 2655 struct inode *inode = file->f_dentry->d_inode;
2622 struct tty_struct *tty = file->private_data; 2656 struct tty_struct *tty = file_tty(file);
2623 struct tty_ldisc *ld; 2657 struct tty_ldisc *ld;
2624 int retval = -ENOIOCTLCMD; 2658 int retval = -ENOIOCTLCMD;
2625 2659
@@ -2711,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty)
2711 if (!filp) 2745 if (!filp)
2712 continue; 2746 continue;
2713 if (filp->f_op->read == tty_read && 2747 if (filp->f_op->read == tty_read &&
2714 filp->private_data == tty) { 2748 file_tty(filp) == tty) {
2715 printk(KERN_NOTICE "SAK: killed process %d" 2749 printk(KERN_NOTICE "SAK: killed process %d"
2716 " (%s): fd#%d opened to the tty\n", 2750 " (%s): fd#%d opened to the tty\n",
2717 task_pid_nr(p), p->comm, i); 2751 task_pid_nr(p), p->comm, i);
diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c
index cdc4dd50d638..8ec83d2dffb7 100644
--- a/drivers/staging/pohmelfs/path_entry.c
+++ b/drivers/staging/pohmelfs/path_entry.c
@@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le
44 return -ENOENT; 44 return -ENOENT;
45 } 45 }
46 46
47 read_lock(&current->fs->lock); 47 spin_lock(&current->fs->lock);
48 path.mnt = mntget(current->fs->root.mnt); 48 path.mnt = mntget(current->fs->root.mnt);
49 read_unlock(&current->fs->lock); 49 spin_unlock(&current->fs->lock);
50 50
51 path.dentry = d; 51 path.dentry = d;
52 52
@@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
91 return -ENOENT; 91 return -ENOENT;
92 } 92 }
93 93
94 read_lock(&current->fs->lock); 94 spin_lock(&current->fs->lock);
95 root = dget(current->fs->root.dentry); 95 root = dget(current->fs->root.dentry);
96 read_unlock(&current->fs->lock); 96 spin_unlock(&current->fs->lock);
97 97
98 spin_lock(&dcache_lock); 98 spin_lock(&dcache_lock);
99 99
diff --git a/fs/buffer.c b/fs/buffer.c
index 50efa339e051..3e7dca279d1c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
770 spin_unlock(lock); 770 spin_unlock(lock);
771 /* 771 /*
772 * Ensure any pending I/O completes so that 772 * Ensure any pending I/O completes so that
773 * ll_rw_block() actually writes the current 773 * write_dirty_buffer() actually writes the
774 * contents - it is a noop if I/O is still in 774 * current contents - it is a noop if I/O is
775 * flight on potentially older contents. 775 * still in flight on potentially older
776 * contents.
776 */ 777 */
777 ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); 778 write_dirty_buffer(bh, WRITE_SYNC_PLUG);
778 779
779 /* 780 /*
780 * Kick off IO for the previous mapping. Note 781 * Kick off IO for the previous mapping. Note
@@ -2912,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh)
2912 BUG_ON(buffer_unwritten(bh)); 2913 BUG_ON(buffer_unwritten(bh));
2913 2914
2914 /* 2915 /*
2915 * Mask in barrier bit for a write (could be either a WRITE or a
2916 * WRITE_SYNC
2917 */
2918 if (buffer_ordered(bh) && (rw & WRITE))
2919 rw |= WRITE_BARRIER;
2920
2921 /*
2922 * Only clear out a write error when rewriting 2916 * Only clear out a write error when rewriting
2923 */ 2917 */
2924 if (test_set_buffer_req(bh) && (rw & WRITE)) 2918 if (test_set_buffer_req(bh) && (rw & WRITE))
@@ -2956,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh);
2956 2950
2957/** 2951/**
2958 * ll_rw_block: low-level access to block devices (DEPRECATED) 2952 * ll_rw_block: low-level access to block devices (DEPRECATED)
2959 * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) 2953 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
2960 * @nr: number of &struct buffer_heads in the array 2954 * @nr: number of &struct buffer_heads in the array
2961 * @bhs: array of pointers to &struct buffer_head 2955 * @bhs: array of pointers to &struct buffer_head
2962 * 2956 *
2963 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and 2957 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
2964 * requests an I/O operation on them, either a %READ or a %WRITE. The third 2958 * requests an I/O operation on them, either a %READ or a %WRITE. The third
2965 * %SWRITE is like %WRITE only we make sure that the *current* data in buffers 2959 * %READA option is described in the documentation for generic_make_request()
2966 * are sent to disk. The fourth %READA option is described in the documentation 2960 * which ll_rw_block() calls.
2967 * for generic_make_request() which ll_rw_block() calls.
2968 * 2961 *
2969 * This function drops any buffer that it cannot get a lock on (with the 2962 * This function drops any buffer that it cannot get a lock on (with the
2970 * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be 2963 * BH_Lock state bit), any buffer that appears to be clean when doing a write
2971 * clean when doing a write request, and any buffer that appears to be 2964 * request, and any buffer that appears to be up-to-date when doing read
2972 * up-to-date when doing read request. Further it marks as clean buffers that 2965 * request. Further it marks as clean buffers that are processed for
2973 * are processed for writing (the buffer cache won't assume that they are 2966 * writing (the buffer cache won't assume that they are actually clean
2974 * actually clean until the buffer gets unlocked). 2967 * until the buffer gets unlocked).
2975 * 2968 *
2976 * ll_rw_block sets b_end_io to simple completion handler that marks 2969 * ll_rw_block sets b_end_io to simple completion handler that marks
2977 * the buffer up-to-date (if approriate), unlocks the buffer and wakes 2970 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -2987,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2987 for (i = 0; i < nr; i++) { 2980 for (i = 0; i < nr; i++) {
2988 struct buffer_head *bh = bhs[i]; 2981 struct buffer_head *bh = bhs[i];
2989 2982
2990 if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) 2983 if (!trylock_buffer(bh))
2991 lock_buffer(bh);
2992 else if (!trylock_buffer(bh))
2993 continue; 2984 continue;
2994 2985 if (rw == WRITE) {
2995 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
2996 rw == SWRITE_SYNC_PLUG) {
2997 if (test_clear_buffer_dirty(bh)) { 2986 if (test_clear_buffer_dirty(bh)) {
2998 bh->b_end_io = end_buffer_write_sync; 2987 bh->b_end_io = end_buffer_write_sync;
2999 get_bh(bh); 2988 get_bh(bh);
3000 if (rw == SWRITE_SYNC) 2989 submit_bh(WRITE, bh);
3001 submit_bh(WRITE_SYNC, bh);
3002 else
3003 submit_bh(WRITE, bh);
3004 continue; 2990 continue;
3005 } 2991 }
3006 } else { 2992 } else {
@@ -3016,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3016} 3002}
3017EXPORT_SYMBOL(ll_rw_block); 3003EXPORT_SYMBOL(ll_rw_block);
3018 3004
3005void write_dirty_buffer(struct buffer_head *bh, int rw)
3006{
3007 lock_buffer(bh);
3008 if (!test_clear_buffer_dirty(bh)) {
3009 unlock_buffer(bh);
3010 return;
3011 }
3012 bh->b_end_io = end_buffer_write_sync;
3013 get_bh(bh);
3014 submit_bh(rw, bh);
3015}
3016EXPORT_SYMBOL(write_dirty_buffer);
3017
3019/* 3018/*
3020 * For a data-integrity writeout, we need to wait upon any in-progress I/O 3019 * For a data-integrity writeout, we need to wait upon any in-progress I/O
3021 * and then start new I/O and then wait upon it. The caller must have a ref on 3020 * and then start new I/O and then wait upon it. The caller must have a ref on
3022 * the buffer_head. 3021 * the buffer_head.
3023 */ 3022 */
3024int sync_dirty_buffer(struct buffer_head *bh) 3023int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3025{ 3024{
3026 int ret = 0; 3025 int ret = 0;
3027 3026
@@ -3030,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
3030 if (test_clear_buffer_dirty(bh)) { 3029 if (test_clear_buffer_dirty(bh)) {
3031 get_bh(bh); 3030 get_bh(bh);
3032 bh->b_end_io = end_buffer_write_sync; 3031 bh->b_end_io = end_buffer_write_sync;
3033 ret = submit_bh(WRITE_SYNC, bh); 3032 ret = submit_bh(rw, bh);
3034 wait_on_buffer(bh); 3033 wait_on_buffer(bh);
3035 if (buffer_eopnotsupp(bh)) { 3034 if (buffer_eopnotsupp(bh)) {
3036 clear_buffer_eopnotsupp(bh); 3035 clear_buffer_eopnotsupp(bh);
@@ -3043,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
3043 } 3042 }
3044 return ret; 3043 return ret;
3045} 3044}
3045EXPORT_SYMBOL(__sync_dirty_buffer);
3046
3047int sync_dirty_buffer(struct buffer_head *bh)
3048{
3049 return __sync_dirty_buffer(bh, WRITE_SYNC);
3050}
3046EXPORT_SYMBOL(sync_dirty_buffer); 3051EXPORT_SYMBOL(sync_dirty_buffer);
3047 3052
3048/* 3053/*
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a53b130b366c..1e7a33028d33 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
80 } 80 }
81 } else { 81 } else {
82 inode = iget_locked(sb, CRAMINO(cramfs_inode)); 82 inode = iget_locked(sb, CRAMINO(cramfs_inode));
83 if (inode) { 83 if (inode && (inode->i_state & I_NEW)) {
84 setup_inode(inode, cramfs_inode); 84 setup_inode(inode, cramfs_inode);
85 unlock_new_inode(inode); 85 unlock_new_inode(inode);
86 } 86 }
diff --git a/fs/dcache.c b/fs/dcache.c
index 4d13bf50b7b1..83293be48149 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci);
1332 * d_lookup - search for a dentry 1332 * d_lookup - search for a dentry
1333 * @parent: parent dentry 1333 * @parent: parent dentry
1334 * @name: qstr of name we wish to find 1334 * @name: qstr of name we wish to find
1335 * Returns: dentry, or NULL
1335 * 1336 *
1336 * Searches the children of the parent dentry for the name in question. If 1337 * d_lookup searches the children of the parent dentry for the name in
1337 * the dentry is found its reference count is incremented and the dentry 1338 * question. If the dentry is found its reference count is incremented and the
1338 * is returned. The caller must use dput to free the entry when it has 1339 * dentry is returned. The caller must use dput to free the entry when it has
1339 * finished using it. %NULL is returned on failure. 1340 * finished using it. %NULL is returned if the dentry does not exist.
1340 *
1341 * __d_lookup is dcache_lock free. The hash list is protected using RCU.
1342 * Memory barriers are used while updating and doing lockless traversal.
1343 * To avoid races with d_move while rename is happening, d_lock is used.
1344 *
1345 * Overflows in memcmp(), while d_move, are avoided by keeping the length
1346 * and name pointer in one structure pointed by d_qstr.
1347 *
1348 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1349 * lookup is going on.
1350 *
1351 * The dentry unused LRU is not updated even if lookup finds the required dentry
1352 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1353 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1354 * acquisition.
1355 *
1356 * d_lookup() is protected against the concurrent renames in some unrelated
1357 * directory using the seqlockt_t rename_lock.
1358 */ 1341 */
1359
1360struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1342struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1361{ 1343{
1362 struct dentry * dentry = NULL; 1344 struct dentry * dentry = NULL;
@@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1372} 1354}
1373EXPORT_SYMBOL(d_lookup); 1355EXPORT_SYMBOL(d_lookup);
1374 1356
1357/*
1358 * __d_lookup - search for a dentry (racy)
1359 * @parent: parent dentry
1360 * @name: qstr of name we wish to find
1361 * Returns: dentry, or NULL
1362 *
1363 * __d_lookup is like d_lookup, however it may (rarely) return a
1364 * false-negative result due to unrelated rename activity.
1365 *
1366 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1367 * however it must be used carefully, eg. with a following d_lookup in
1368 * the case of failure.
1369 *
1370 * __d_lookup callers must be commented.
1371 */
1375struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1372struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1376{ 1373{
1377 unsigned int len = name->len; 1374 unsigned int len = name->len;
@@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1382 struct hlist_node *node; 1379 struct hlist_node *node;
1383 struct dentry *dentry; 1380 struct dentry *dentry;
1384 1381
1382 /*
1383 * The hash list is protected using RCU.
1384 *
1385 * Take d_lock when comparing a candidate dentry, to avoid races
1386 * with d_move().
1387 *
1388 * It is possible that concurrent renames can mess up our list
1389 * walk here and result in missing our dentry, resulting in the
1390 * false-negative result. d_lookup() protects against concurrent
1391 * renames using rename_lock seqlock.
1392 *
1393 * See Documentation/vfs/dcache-locking.txt for more details.
1394 */
1385 rcu_read_lock(); 1395 rcu_read_lock();
1386 1396
1387 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1397 hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
@@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1396 1406
1397 /* 1407 /*
1398 * Recheck the dentry after taking the lock - d_move may have 1408 * Recheck the dentry after taking the lock - d_move may have
1399 * changed things. Don't bother checking the hash because we're 1409 * changed things. Don't bother checking the hash because
1400 * about to compare the whole name anyway. 1410 * we're about to compare the whole name anyway.
1401 */ 1411 */
1402 if (dentry->d_parent != parent) 1412 if (dentry->d_parent != parent)
1403 goto next; 1413 goto next;
@@ -1925,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root,
1925 bool slash = false; 1935 bool slash = false;
1926 int error = 0; 1936 int error = 0;
1927 1937
1928 spin_lock(&vfsmount_lock); 1938 br_read_lock(vfsmount_lock);
1929 while (dentry != root->dentry || vfsmnt != root->mnt) { 1939 while (dentry != root->dentry || vfsmnt != root->mnt) {
1930 struct dentry * parent; 1940 struct dentry * parent;
1931 1941
@@ -1954,7 +1964,7 @@ out:
1954 if (!error && !slash) 1964 if (!error && !slash)
1955 error = prepend(buffer, buflen, "/", 1); 1965 error = prepend(buffer, buflen, "/", 1);
1956 1966
1957 spin_unlock(&vfsmount_lock); 1967 br_read_unlock(vfsmount_lock);
1958 return error; 1968 return error;
1959 1969
1960global_root: 1970global_root:
@@ -2292,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2)
2292 struct vfsmount *mnt = path1->mnt; 2302 struct vfsmount *mnt = path1->mnt;
2293 struct dentry *dentry = path1->dentry; 2303 struct dentry *dentry = path1->dentry;
2294 int res; 2304 int res;
2295 spin_lock(&vfsmount_lock); 2305
2306 br_read_lock(vfsmount_lock);
2296 if (mnt != path2->mnt) { 2307 if (mnt != path2->mnt) {
2297 for (;;) { 2308 for (;;) {
2298 if (mnt->mnt_parent == mnt) { 2309 if (mnt->mnt_parent == mnt) {
2299 spin_unlock(&vfsmount_lock); 2310 br_read_unlock(vfsmount_lock);
2300 return 0; 2311 return 0;
2301 } 2312 }
2302 if (mnt->mnt_parent == path2->mnt) 2313 if (mnt->mnt_parent == path2->mnt)
@@ -2306,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2)
2306 dentry = mnt->mnt_mountpoint; 2317 dentry = mnt->mnt_mountpoint;
2307 } 2318 }
2308 res = is_subdir(dentry, path2->dentry); 2319 res = is_subdir(dentry, path2->dentry);
2309 spin_unlock(&vfsmount_lock); 2320 br_read_unlock(vfsmount_lock);
2310 return res; 2321 return res;
2311} 2322}
2312EXPORT_SYMBOL(path_is_under); 2323EXPORT_SYMBOL(path_is_under);
diff --git a/fs/exec.c b/fs/exec.c
index 05c7d6b84df7..2d9455282744 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1118,7 +1118,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1118 bprm->unsafe = tracehook_unsafe_exec(p); 1118 bprm->unsafe = tracehook_unsafe_exec(p);
1119 1119
1120 n_fs = 1; 1120 n_fs = 1;
1121 write_lock(&p->fs->lock); 1121 spin_lock(&p->fs->lock);
1122 rcu_read_lock(); 1122 rcu_read_lock();
1123 for (t = next_thread(p); t != p; t = next_thread(t)) { 1123 for (t = next_thread(p); t != p; t = next_thread(t)) {
1124 if (t->fs == p->fs) 1124 if (t->fs == p->fs)
@@ -1135,7 +1135,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1135 res = 1; 1135 res = 1;
1136 } 1136 }
1137 } 1137 }
1138 write_unlock(&p->fs->lock); 1138 spin_unlock(&p->fs->lock);
1139 1139
1140 return res; 1140 return res;
1141} 1141}
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 1fa23f6ffba5..1736f2356388 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
250{ 250{
251 int i, err = 0; 251 int i, err = 0;
252 252
253 ll_rw_block(SWRITE, nr_bhs, bhs); 253 for (i = 0; i < nr_bhs; i++)
254 write_dirty_buffer(bhs[i], WRITE);
255
254 for (i = 0; i < nr_bhs; i++) { 256 for (i = 0; i < nr_bhs; i++) {
255 wait_on_buffer(bhs[i]); 257 wait_on_buffer(bhs[i]);
256 if (buffer_eopnotsupp(bhs[i])) { 258 if (buffer_eopnotsupp(bhs[i])) {
diff --git a/fs/file_table.c b/fs/file_table.c
index edecd36fed9b..a04bdd81c11c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -20,7 +20,9 @@
20#include <linux/cdev.h> 20#include <linux/cdev.h>
21#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/lglock.h>
23#include <linux/percpu_counter.h> 24#include <linux/percpu_counter.h>
25#include <linux/percpu.h>
24#include <linux/ima.h> 26#include <linux/ima.h>
25 27
26#include <asm/atomic.h> 28#include <asm/atomic.h>
@@ -32,8 +34,8 @@ struct files_stat_struct files_stat = {
32 .max_files = NR_FILE 34 .max_files = NR_FILE
33}; 35};
34 36
35/* public. Not pretty! */ 37DECLARE_LGLOCK(files_lglock);
36__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); 38DEFINE_LGLOCK(files_lglock);
37 39
38/* SLAB cache for file structures */ 40/* SLAB cache for file structures */
39static struct kmem_cache *filp_cachep __read_mostly; 41static struct kmem_cache *filp_cachep __read_mostly;
@@ -249,7 +251,7 @@ static void __fput(struct file *file)
249 cdev_put(inode->i_cdev); 251 cdev_put(inode->i_cdev);
250 fops_put(file->f_op); 252 fops_put(file->f_op);
251 put_pid(file->f_owner.pid); 253 put_pid(file->f_owner.pid);
252 file_kill(file); 254 file_sb_list_del(file);
253 if (file->f_mode & FMODE_WRITE) 255 if (file->f_mode & FMODE_WRITE)
254 drop_file_write_access(file); 256 drop_file_write_access(file);
255 file->f_path.dentry = NULL; 257 file->f_path.dentry = NULL;
@@ -328,41 +330,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
328 return file; 330 return file;
329} 331}
330 332
331
332void put_filp(struct file *file) 333void put_filp(struct file *file)
333{ 334{
334 if (atomic_long_dec_and_test(&file->f_count)) { 335 if (atomic_long_dec_and_test(&file->f_count)) {
335 security_file_free(file); 336 security_file_free(file);
336 file_kill(file); 337 file_sb_list_del(file);
337 file_free(file); 338 file_free(file);
338 } 339 }
339} 340}
340 341
341void file_move(struct file *file, struct list_head *list) 342static inline int file_list_cpu(struct file *file)
342{ 343{
343 if (!list) 344#ifdef CONFIG_SMP
344 return; 345 return file->f_sb_list_cpu;
345 file_list_lock(); 346#else
346 list_move(&file->f_u.fu_list, list); 347 return smp_processor_id();
347 file_list_unlock(); 348#endif
349}
350
351/* helper for file_sb_list_add to reduce ifdefs */
352static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
353{
354 struct list_head *list;
355#ifdef CONFIG_SMP
356 int cpu;
357 cpu = smp_processor_id();
358 file->f_sb_list_cpu = cpu;
359 list = per_cpu_ptr(sb->s_files, cpu);
360#else
361 list = &sb->s_files;
362#endif
363 list_add(&file->f_u.fu_list, list);
348} 364}
349 365
350void file_kill(struct file *file) 366/**
367 * file_sb_list_add - add a file to the sb's file list
368 * @file: file to add
369 * @sb: sb to add it to
370 *
371 * Use this function to associate a file with the superblock of the inode it
372 * refers to.
373 */
374void file_sb_list_add(struct file *file, struct super_block *sb)
375{
376 lg_local_lock(files_lglock);
377 __file_sb_list_add(file, sb);
378 lg_local_unlock(files_lglock);
379}
380
381/**
382 * file_sb_list_del - remove a file from the sb's file list
383 * @file: file to remove
384 * @sb: sb to remove it from
385 *
386 * Use this function to remove a file from its superblock.
387 */
388void file_sb_list_del(struct file *file)
351{ 389{
352 if (!list_empty(&file->f_u.fu_list)) { 390 if (!list_empty(&file->f_u.fu_list)) {
353 file_list_lock(); 391 lg_local_lock_cpu(files_lglock, file_list_cpu(file));
354 list_del_init(&file->f_u.fu_list); 392 list_del_init(&file->f_u.fu_list);
355 file_list_unlock(); 393 lg_local_unlock_cpu(files_lglock, file_list_cpu(file));
356 } 394 }
357} 395}
358 396
397#ifdef CONFIG_SMP
398
399/*
400 * These macros iterate all files on all CPUs for a given superblock.
401 * files_lglock must be held globally.
402 */
403#define do_file_list_for_each_entry(__sb, __file) \
404{ \
405 int i; \
406 for_each_possible_cpu(i) { \
407 struct list_head *list; \
408 list = per_cpu_ptr((__sb)->s_files, i); \
409 list_for_each_entry((__file), list, f_u.fu_list)
410
411#define while_file_list_for_each_entry \
412 } \
413}
414
415#else
416
417#define do_file_list_for_each_entry(__sb, __file) \
418{ \
419 struct list_head *list; \
420 list = &(sb)->s_files; \
421 list_for_each_entry((__file), list, f_u.fu_list)
422
423#define while_file_list_for_each_entry \
424}
425
426#endif
427
359int fs_may_remount_ro(struct super_block *sb) 428int fs_may_remount_ro(struct super_block *sb)
360{ 429{
361 struct file *file; 430 struct file *file;
362
363 /* Check that no files are currently opened for writing. */ 431 /* Check that no files are currently opened for writing. */
364 file_list_lock(); 432 lg_global_lock(files_lglock);
365 list_for_each_entry(file, &sb->s_files, f_u.fu_list) { 433 do_file_list_for_each_entry(sb, file) {
366 struct inode *inode = file->f_path.dentry->d_inode; 434 struct inode *inode = file->f_path.dentry->d_inode;
367 435
368 /* File with pending delete? */ 436 /* File with pending delete? */
@@ -372,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb)
372 /* Writeable file? */ 440 /* Writeable file? */
373 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) 441 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
374 goto too_bad; 442 goto too_bad;
375 } 443 } while_file_list_for_each_entry;
376 file_list_unlock(); 444 lg_global_unlock(files_lglock);
377 return 1; /* Tis' cool bro. */ 445 return 1; /* Tis' cool bro. */
378too_bad: 446too_bad:
379 file_list_unlock(); 447 lg_global_unlock(files_lglock);
380 return 0; 448 return 0;
381} 449}
382 450
@@ -392,8 +460,8 @@ void mark_files_ro(struct super_block *sb)
392 struct file *f; 460 struct file *f;
393 461
394retry: 462retry:
395 file_list_lock(); 463 lg_global_lock(files_lglock);
396 list_for_each_entry(f, &sb->s_files, f_u.fu_list) { 464 do_file_list_for_each_entry(sb, f) {
397 struct vfsmount *mnt; 465 struct vfsmount *mnt;
398 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) 466 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
399 continue; 467 continue;
@@ -408,16 +476,13 @@ retry:
408 continue; 476 continue;
409 file_release_write(f); 477 file_release_write(f);
410 mnt = mntget(f->f_path.mnt); 478 mnt = mntget(f->f_path.mnt);
411 file_list_unlock(); 479 /* This can sleep, so we can't hold the spinlock. */
412 /* 480 lg_global_unlock(files_lglock);
413 * This can sleep, so we can't hold
414 * the file_list_lock() spinlock.
415 */
416 mnt_drop_write(mnt); 481 mnt_drop_write(mnt);
417 mntput(mnt); 482 mntput(mnt);
418 goto retry; 483 goto retry;
419 } 484 } while_file_list_for_each_entry;
420 file_list_unlock(); 485 lg_global_unlock(files_lglock);
421} 486}
422 487
423void __init files_init(unsigned long mempages) 488void __init files_init(unsigned long mempages)
@@ -437,5 +502,6 @@ void __init files_init(unsigned long mempages)
437 if (files_stat.max_files < NR_FILE) 502 if (files_stat.max_files < NR_FILE)
438 files_stat.max_files = NR_FILE; 503 files_stat.max_files = NR_FILE;
439 files_defer_init(); 504 files_defer_init();
505 lg_lock_init(files_lglock);
440 percpu_counter_init(&nr_files, 0); 506 percpu_counter_init(&nr_files, 0);
441} 507}
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 1ee40eb9a2c0..ed45a9cf5f3d 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
13{ 13{
14 struct path old_root; 14 struct path old_root;
15 15
16 write_lock(&fs->lock); 16 spin_lock(&fs->lock);
17 old_root = fs->root; 17 old_root = fs->root;
18 fs->root = *path; 18 fs->root = *path;
19 path_get(path); 19 path_get(path);
20 write_unlock(&fs->lock); 20 spin_unlock(&fs->lock);
21 if (old_root.dentry) 21 if (old_root.dentry)
22 path_put(&old_root); 22 path_put(&old_root);
23} 23}
@@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
30{ 30{
31 struct path old_pwd; 31 struct path old_pwd;
32 32
33 write_lock(&fs->lock); 33 spin_lock(&fs->lock);
34 old_pwd = fs->pwd; 34 old_pwd = fs->pwd;
35 fs->pwd = *path; 35 fs->pwd = *path;
36 path_get(path); 36 path_get(path);
37 write_unlock(&fs->lock); 37 spin_unlock(&fs->lock);
38 38
39 if (old_pwd.dentry) 39 if (old_pwd.dentry)
40 path_put(&old_pwd); 40 path_put(&old_pwd);
@@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
51 task_lock(p); 51 task_lock(p);
52 fs = p->fs; 52 fs = p->fs;
53 if (fs) { 53 if (fs) {
54 write_lock(&fs->lock); 54 spin_lock(&fs->lock);
55 if (fs->root.dentry == old_root->dentry 55 if (fs->root.dentry == old_root->dentry
56 && fs->root.mnt == old_root->mnt) { 56 && fs->root.mnt == old_root->mnt) {
57 path_get(new_root); 57 path_get(new_root);
@@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
64 fs->pwd = *new_root; 64 fs->pwd = *new_root;
65 count++; 65 count++;
66 } 66 }
67 write_unlock(&fs->lock); 67 spin_unlock(&fs->lock);
68 } 68 }
69 task_unlock(p); 69 task_unlock(p);
70 } while_each_thread(g, p); 70 } while_each_thread(g, p);
@@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk)
87 if (fs) { 87 if (fs) {
88 int kill; 88 int kill;
89 task_lock(tsk); 89 task_lock(tsk);
90 write_lock(&fs->lock); 90 spin_lock(&fs->lock);
91 tsk->fs = NULL; 91 tsk->fs = NULL;
92 kill = !--fs->users; 92 kill = !--fs->users;
93 write_unlock(&fs->lock); 93 spin_unlock(&fs->lock);
94 task_unlock(tsk); 94 task_unlock(tsk);
95 if (kill) 95 if (kill)
96 free_fs_struct(fs); 96 free_fs_struct(fs);
@@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
104 if (fs) { 104 if (fs) {
105 fs->users = 1; 105 fs->users = 1;
106 fs->in_exec = 0; 106 fs->in_exec = 0;
107 rwlock_init(&fs->lock); 107 spin_lock_init(&fs->lock);
108 fs->umask = old->umask; 108 fs->umask = old->umask;
109 get_fs_root_and_pwd(old, &fs->root, &fs->pwd); 109 get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
110 } 110 }
@@ -121,10 +121,10 @@ int unshare_fs_struct(void)
121 return -ENOMEM; 121 return -ENOMEM;
122 122
123 task_lock(current); 123 task_lock(current);
124 write_lock(&fs->lock); 124 spin_lock(&fs->lock);
125 kill = !--fs->users; 125 kill = !--fs->users;
126 current->fs = new_fs; 126 current->fs = new_fs;
127 write_unlock(&fs->lock); 127 spin_unlock(&fs->lock);
128 task_unlock(current); 128 task_unlock(current);
129 129
130 if (kill) 130 if (kill)
@@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask);
143/* to be mentioned only in INIT_TASK */ 143/* to be mentioned only in INIT_TASK */
144struct fs_struct init_fs = { 144struct fs_struct init_fs = {
145 .users = 1, 145 .users = 1,
146 .lock = __RW_LOCK_UNLOCKED(init_fs.lock), 146 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
147 .umask = 0022, 147 .umask = 0022,
148}; 148};
149 149
@@ -156,14 +156,14 @@ void daemonize_fs_struct(void)
156 156
157 task_lock(current); 157 task_lock(current);
158 158
159 write_lock(&init_fs.lock); 159 spin_lock(&init_fs.lock);
160 init_fs.users++; 160 init_fs.users++;
161 write_unlock(&init_fs.lock); 161 spin_unlock(&init_fs.lock);
162 162
163 write_lock(&fs->lock); 163 spin_lock(&fs->lock);
164 current->fs = &init_fs; 164 current->fs = &init_fs;
165 kill = !--fs->users; 165 kill = !--fs->users;
166 write_unlock(&fs->lock); 166 spin_unlock(&fs->lock);
167 167
168 task_unlock(current); 168 task_unlock(current);
169 if (kill) 169 if (kill)
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 99800e564157..6bc9e3a5a693 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
94 if (error < 0) 94 if (error < 0)
95 goto failed; 95 goto failed;
96 inode->i_mode = mode; 96 inode->i_mode = mode;
97 inode->i_ctime = CURRENT_TIME;
97 if (error == 0) { 98 if (error == 0) {
98 posix_acl_release(acl); 99 posix_acl_release(acl);
99 acl = NULL; 100 acl = NULL;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index dd1e55535a4e..f7dc9b5f9ef8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name)
104 __putname(name); 104 __putname(name);
105 return NULL; 105 return NULL;
106 } 106 }
107 strncpy(name, root, PATH_MAX); 107 strlcpy(name, root, PATH_MAX);
108 if (len > p - name) { 108 if (len > p - name) {
109 __putname(name); 109 __putname(name);
110 return NULL; 110 return NULL;
@@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
876 char *path = dentry_name(dentry); 876 char *path = dentry_name(dentry);
877 int err = -ENOMEM; 877 int err = -ENOMEM;
878 if (path) { 878 if (path) {
879 int err = hostfs_do_readlink(path, link, PATH_MAX); 879 err = hostfs_do_readlink(path, link, PATH_MAX);
880 if (err == PATH_MAX) 880 if (err == PATH_MAX)
881 err = -E2BIG; 881 err = -E2BIG;
882 __putname(path); 882 __putname(path);
diff --git a/fs/internal.h b/fs/internal.h
index 6b706bc60a66..a6910e91cee8 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,6 +9,8 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/lglock.h>
13
12struct super_block; 14struct super_block;
13struct linux_binprm; 15struct linux_binprm;
14struct path; 16struct path;
@@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
70 72
71extern void __init mnt_init(void); 73extern void __init mnt_init(void);
72 74
73extern spinlock_t vfsmount_lock; 75DECLARE_BRLOCK(vfsmount_lock);
76
74 77
75/* 78/*
76 * fs_struct.c 79 * fs_struct.c
@@ -80,6 +83,8 @@ extern void chroot_fs_refs(struct path *, struct path *);
80/* 83/*
81 * file_table.c 84 * file_table.c
82 */ 85 */
86extern void file_sb_list_add(struct file *f, struct super_block *sb);
87extern void file_sb_list_del(struct file *f);
83extern void mark_files_ro(struct super_block *); 88extern void mark_files_ro(struct super_block *);
84extern struct file *get_empty_filp(void); 89extern struct file *get_empty_filp(void);
85 90
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index b0435dd0654d..05a38b9c4c0e 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
254{ 254{
255 int i; 255 int i;
256 256
257 ll_rw_block(SWRITE, *batch_count, bhs); 257 for (i = 0; i < *batch_count; i++)
258 write_dirty_buffer(bhs[i], WRITE);
259
258 for (i = 0; i < *batch_count; i++) { 260 for (i = 0; i < *batch_count; i++) {
259 struct buffer_head *bh = bhs[i]; 261 struct buffer_head *bh = bhs[i];
260 clear_buffer_jwrite(bh); 262 clear_buffer_jwrite(bh);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 28a9ddaa0c49..95d8c11c929e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal,
119 struct buffer_head *bh; 119 struct buffer_head *bh;
120 journal_header_t *header; 120 journal_header_t *header;
121 int ret; 121 int ret;
122 int barrier_done = 0;
123 122
124 if (is_journal_aborted(journal)) 123 if (is_journal_aborted(journal))
125 return 0; 124 return 0;
@@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal,
137 136
138 JBUFFER_TRACE(descriptor, "write commit block"); 137 JBUFFER_TRACE(descriptor, "write commit block");
139 set_buffer_dirty(bh); 138 set_buffer_dirty(bh);
139
140 if (journal->j_flags & JFS_BARRIER) { 140 if (journal->j_flags & JFS_BARRIER) {
141 set_buffer_ordered(bh); 141 ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER);
142 barrier_done = 1;
143 }
144 ret = sync_dirty_buffer(bh);
145 if (barrier_done)
146 clear_buffer_ordered(bh);
147 /* is it possible for another commit to fail at roughly
148 * the same time as this one? If so, we don't want to
149 * trust the barrier flag in the super, but instead want
150 * to remember if we sent a barrier request
151 */
152 if (ret == -EOPNOTSUPP && barrier_done) {
153 char b[BDEVNAME_SIZE];
154 142
155 printk(KERN_WARNING 143 /*
156 "JBD: barrier-based sync failed on %s - " 144 * Is it possible for another commit to fail at roughly
157 "disabling barriers\n", 145 * the same time as this one? If so, we don't want to
158 bdevname(journal->j_dev, b)); 146 * trust the barrier flag in the super, but instead want
159 spin_lock(&journal->j_state_lock); 147 * to remember if we sent a barrier request
160 journal->j_flags &= ~JFS_BARRIER; 148 */
161 spin_unlock(&journal->j_state_lock); 149 if (ret == -EOPNOTSUPP) {
150 char b[BDEVNAME_SIZE];
162 151
163 /* And try again, without the barrier */ 152 printk(KERN_WARNING
164 set_buffer_uptodate(bh); 153 "JBD: barrier-based sync failed on %s - "
165 set_buffer_dirty(bh); 154 "disabling barriers\n",
155 bdevname(journal->j_dev, b));
156 spin_lock(&journal->j_state_lock);
157 journal->j_flags &= ~JFS_BARRIER;
158 spin_unlock(&journal->j_state_lock);
159
160 /* And try again, without the barrier */
161 set_buffer_uptodate(bh);
162 set_buffer_dirty(bh);
163 ret = sync_dirty_buffer(bh);
164 }
165 } else {
166 ret = sync_dirty_buffer(bh); 166 ret = sync_dirty_buffer(bh);
167 } 167 }
168
168 put_bh(bh); /* One for getblk() */ 169 put_bh(bh); /* One for getblk() */
169 journal_put_journal_head(descriptor); 170 journal_put_journal_head(descriptor);
170 171
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f19ce94693d8..2c4b1f109da9 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait)
1024 if (wait) 1024 if (wait)
1025 sync_dirty_buffer(bh); 1025 sync_dirty_buffer(bh);
1026 else 1026 else
1027 ll_rw_block(SWRITE, 1, &bh); 1027 write_dirty_buffer(bh, WRITE);
1028 1028
1029out: 1029out:
1030 /* If we have just flushed the log (by marking s_start==0), then 1030 /* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index ad717328343a..d29018307e2e 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
617 set_buffer_jwrite(bh); 617 set_buffer_jwrite(bh);
618 BUFFER_TRACE(bh, "write"); 618 BUFFER_TRACE(bh, "write");
619 set_buffer_dirty(bh); 619 set_buffer_dirty(bh);
620 ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); 620 write_dirty_buffer(bh, write_op);
621} 621}
622#endif 622#endif
623 623
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1c23a0f4e8a3..5247e7ffdcb4 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count)
255{ 255{
256 int i; 256 int i;
257 257
258 ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); 258 for (i = 0; i < *batch_count; i++)
259 write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE);
260
259 for (i = 0; i < *batch_count; i++) { 261 for (i = 0; i < *batch_count; i++) {
260 struct buffer_head *bh = journal->j_chkpt_bhs[i]; 262 struct buffer_head *bh = journal->j_chkpt_bhs[i];
261 clear_buffer_jwrite(bh); 263 clear_buffer_jwrite(bh);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f52e5e8049f1..7c068c189d80 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal,
101 struct commit_header *tmp; 101 struct commit_header *tmp;
102 struct buffer_head *bh; 102 struct buffer_head *bh;
103 int ret; 103 int ret;
104 int barrier_done = 0;
105 struct timespec now = current_kernel_time(); 104 struct timespec now = current_kernel_time();
106 105
107 if (is_journal_aborted(journal)) 106 if (is_journal_aborted(journal))
@@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal,
136 if (journal->j_flags & JBD2_BARRIER && 135 if (journal->j_flags & JBD2_BARRIER &&
137 !JBD2_HAS_INCOMPAT_FEATURE(journal, 136 !JBD2_HAS_INCOMPAT_FEATURE(journal,
138 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 137 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
139 set_buffer_ordered(bh); 138 ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh);
140 barrier_done = 1; 139 if (ret == -EOPNOTSUPP) {
141 } 140 printk(KERN_WARNING
142 ret = submit_bh(WRITE_SYNC_PLUG, bh); 141 "JBD2: Disabling barriers on %s, "
143 if (barrier_done) 142 "not supported by device\n", journal->j_devname);
144 clear_buffer_ordered(bh); 143 write_lock(&journal->j_state_lock);
145 144 journal->j_flags &= ~JBD2_BARRIER;
146 /* is it possible for another commit to fail at roughly 145 write_unlock(&journal->j_state_lock);
147 * the same time as this one? If so, we don't want to
148 * trust the barrier flag in the super, but instead want
149 * to remember if we sent a barrier request
150 */
151 if (ret == -EOPNOTSUPP && barrier_done) {
152 printk(KERN_WARNING
153 "JBD2: Disabling barriers on %s, "
154 "not supported by device\n", journal->j_devname);
155 write_lock(&journal->j_state_lock);
156 journal->j_flags &= ~JBD2_BARRIER;
157 write_unlock(&journal->j_state_lock);
158 146
159 /* And try again, without the barrier */ 147 /* And try again, without the barrier */
160 lock_buffer(bh); 148 lock_buffer(bh);
161 set_buffer_uptodate(bh); 149 set_buffer_uptodate(bh);
162 clear_buffer_dirty(bh); 150 clear_buffer_dirty(bh);
151 ret = submit_bh(WRITE_SYNC_PLUG, bh);
152 }
153 } else {
163 ret = submit_bh(WRITE_SYNC_PLUG, bh); 154 ret = submit_bh(WRITE_SYNC_PLUG, bh);
164 } 155 }
165 *cbh = bh; 156 *cbh = bh;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ad5866aaf0f9..0e8014ea6b94 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1124 set_buffer_uptodate(bh); 1124 set_buffer_uptodate(bh);
1125 } 1125 }
1126 } else 1126 } else
1127 ll_rw_block(SWRITE, 1, &bh); 1127 write_dirty_buffer(bh, WRITE);
1128 1128
1129out: 1129out:
1130 /* If we have just flushed the log (by marking s_start==0), then 1130 /* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index a360b06af2e3..9ad321fd63fd 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
625 set_buffer_jwrite(bh); 625 set_buffer_jwrite(bh);
626 BUFFER_TRACE(bh, "write"); 626 BUFFER_TRACE(bh, "write");
627 set_buffer_dirty(bh); 627 set_buffer_dirty(bh);
628 ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); 628 write_dirty_buffer(bh, write_op);
629} 629}
630#endif 630#endif
631 631
diff --git a/fs/mbcache.c b/fs/mbcache.c
index cf4e6cdfd15b..93444747237b 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -80,6 +80,7 @@ struct mb_cache {
80 struct list_head c_cache_list; 80 struct list_head c_cache_list;
81 const char *c_name; 81 const char *c_name;
82 atomic_t c_entry_count; 82 atomic_t c_entry_count;
83 int c_max_entries;
83 int c_bucket_bits; 84 int c_bucket_bits;
84 struct kmem_cache *c_entry_cache; 85 struct kmem_cache *c_entry_cache;
85 struct list_head *c_block_hash; 86 struct list_head *c_block_hash;
@@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits)
243 if (!cache->c_entry_cache) 244 if (!cache->c_entry_cache)
244 goto fail2; 245 goto fail2;
245 246
247 /*
248 * Set an upper limit on the number of cache entries so that the hash
249 * chains won't grow too long.
250 */
251 cache->c_max_entries = bucket_count << 4;
252
246 spin_lock(&mb_cache_spinlock); 253 spin_lock(&mb_cache_spinlock);
247 list_add(&cache->c_cache_list, &mb_cache_list); 254 list_add(&cache->c_cache_list, &mb_cache_list);
248 spin_unlock(&mb_cache_spinlock); 255 spin_unlock(&mb_cache_spinlock);
@@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache)
333 kfree(cache); 340 kfree(cache);
334} 341}
335 342
336
337/* 343/*
338 * mb_cache_entry_alloc() 344 * mb_cache_entry_alloc()
339 * 345 *
@@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache)
345struct mb_cache_entry * 351struct mb_cache_entry *
346mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) 352mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
347{ 353{
348 struct mb_cache_entry *ce; 354 struct mb_cache_entry *ce = NULL;
349 355
350 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); 356 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
351 if (ce) { 357 spin_lock(&mb_cache_spinlock);
358 if (!list_empty(&mb_cache_lru_list)) {
359 ce = list_entry(mb_cache_lru_list.next,
360 struct mb_cache_entry, e_lru_list);
361 list_del_init(&ce->e_lru_list);
362 __mb_cache_entry_unhash(ce);
363 }
364 spin_unlock(&mb_cache_spinlock);
365 }
366 if (!ce) {
367 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
368 if (!ce)
369 return NULL;
352 atomic_inc(&cache->c_entry_count); 370 atomic_inc(&cache->c_entry_count);
353 INIT_LIST_HEAD(&ce->e_lru_list); 371 INIT_LIST_HEAD(&ce->e_lru_list);
354 INIT_LIST_HEAD(&ce->e_block_list); 372 INIT_LIST_HEAD(&ce->e_block_list);
355 ce->e_cache = cache; 373 ce->e_cache = cache;
356 ce->e_used = 1 + MB_CACHE_WRITER;
357 ce->e_queued = 0; 374 ce->e_queued = 0;
358 } 375 }
376 ce->e_used = 1 + MB_CACHE_WRITER;
359 return ce; 377 return ce;
360} 378}
361 379
diff --git a/fs/namei.c b/fs/namei.c
index 17ea76bf2fbe..24896e833565 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -595,15 +595,16 @@ int follow_up(struct path *path)
595{ 595{
596 struct vfsmount *parent; 596 struct vfsmount *parent;
597 struct dentry *mountpoint; 597 struct dentry *mountpoint;
598 spin_lock(&vfsmount_lock); 598
599 br_read_lock(vfsmount_lock);
599 parent = path->mnt->mnt_parent; 600 parent = path->mnt->mnt_parent;
600 if (parent == path->mnt) { 601 if (parent == path->mnt) {
601 spin_unlock(&vfsmount_lock); 602 br_read_unlock(vfsmount_lock);
602 return 0; 603 return 0;
603 } 604 }
604 mntget(parent); 605 mntget(parent);
605 mountpoint = dget(path->mnt->mnt_mountpoint); 606 mountpoint = dget(path->mnt->mnt_mountpoint);
606 spin_unlock(&vfsmount_lock); 607 br_read_unlock(vfsmount_lock);
607 dput(path->dentry); 608 dput(path->dentry);
608 path->dentry = mountpoint; 609 path->dentry = mountpoint;
609 mntput(path->mnt); 610 mntput(path->mnt);
@@ -686,6 +687,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
686} 687}
687 688
688/* 689/*
690 * Allocate a dentry with name and parent, and perform a parent
691 * directory ->lookup on it. Returns the new dentry, or ERR_PTR
692 * on error. parent->d_inode->i_mutex must be held. d_lookup must
693 * have verified that no child exists while under i_mutex.
694 */
695static struct dentry *d_alloc_and_lookup(struct dentry *parent,
696 struct qstr *name, struct nameidata *nd)
697{
698 struct inode *inode = parent->d_inode;
699 struct dentry *dentry;
700 struct dentry *old;
701
702 /* Don't create child dentry for a dead directory. */
703 if (unlikely(IS_DEADDIR(inode)))
704 return ERR_PTR(-ENOENT);
705
706 dentry = d_alloc(parent, name);
707 if (unlikely(!dentry))
708 return ERR_PTR(-ENOMEM);
709
710 old = inode->i_op->lookup(inode, dentry, nd);
711 if (unlikely(old)) {
712 dput(dentry);
713 dentry = old;
714 }
715 return dentry;
716}
717
718/*
689 * It's more convoluted than I'd like it to be, but... it's still fairly 719 * It's more convoluted than I'd like it to be, but... it's still fairly
690 * small and for now I'd prefer to have fast path as straight as possible. 720 * small and for now I'd prefer to have fast path as straight as possible.
691 * It _is_ time-critical. 721 * It _is_ time-critical.
@@ -706,9 +736,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
706 return err; 736 return err;
707 } 737 }
708 738
739 /*
740 * Rename seqlock is not required here because in the off chance
741 * of a false negative due to a concurrent rename, we're going to
742 * do the non-racy lookup, below.
743 */
709 dentry = __d_lookup(nd->path.dentry, name); 744 dentry = __d_lookup(nd->path.dentry, name);
710 if (!dentry) 745 if (!dentry)
711 goto need_lookup; 746 goto need_lookup;
747found:
712 if (dentry->d_op && dentry->d_op->d_revalidate) 748 if (dentry->d_op && dentry->d_op->d_revalidate)
713 goto need_revalidate; 749 goto need_revalidate;
714done: 750done:
@@ -724,56 +760,28 @@ need_lookup:
724 mutex_lock(&dir->i_mutex); 760 mutex_lock(&dir->i_mutex);
725 /* 761 /*
726 * First re-do the cached lookup just in case it was created 762 * First re-do the cached lookup just in case it was created
727 * while we waited for the directory semaphore.. 763 * while we waited for the directory semaphore, or the first
764 * lookup failed due to an unrelated rename.
728 * 765 *
729 * FIXME! This could use version numbering or similar to 766 * This could use version numbering or similar to avoid unnecessary
730 * avoid unnecessary cache lookups. 767 * cache lookups, but then we'd have to do the first lookup in the
731 * 768 * non-racy way. However in the common case here, everything should
732 * The "dcache_lock" is purely to protect the RCU list walker 769 * be hot in cache, so would it be a big win?
733 * from concurrent renames at this point (we mustn't get false
734 * negatives from the RCU list walk here, unlike the optimistic
735 * fast walk).
736 *
737 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
738 */ 770 */
739 dentry = d_lookup(parent, name); 771 dentry = d_lookup(parent, name);
740 if (!dentry) { 772 if (likely(!dentry)) {
741 struct dentry *new; 773 dentry = d_alloc_and_lookup(parent, name, nd);
742
743 /* Don't create child dentry for a dead directory. */
744 dentry = ERR_PTR(-ENOENT);
745 if (IS_DEADDIR(dir))
746 goto out_unlock;
747
748 new = d_alloc(parent, name);
749 dentry = ERR_PTR(-ENOMEM);
750 if (new) {
751 dentry = dir->i_op->lookup(dir, new, nd);
752 if (dentry)
753 dput(new);
754 else
755 dentry = new;
756 }
757out_unlock:
758 mutex_unlock(&dir->i_mutex); 774 mutex_unlock(&dir->i_mutex);
759 if (IS_ERR(dentry)) 775 if (IS_ERR(dentry))
760 goto fail; 776 goto fail;
761 goto done; 777 goto done;
762 } 778 }
763
764 /* 779 /*
765 * Uhhuh! Nasty case: the cache was re-populated while 780 * Uhhuh! Nasty case: the cache was re-populated while
766 * we waited on the semaphore. Need to revalidate. 781 * we waited on the semaphore. Need to revalidate.
767 */ 782 */
768 mutex_unlock(&dir->i_mutex); 783 mutex_unlock(&dir->i_mutex);
769 if (dentry->d_op && dentry->d_op->d_revalidate) { 784 goto found;
770 dentry = do_revalidate(dentry, nd);
771 if (!dentry)
772 dentry = ERR_PTR(-ENOENT);
773 }
774 if (IS_ERR(dentry))
775 goto fail;
776 goto done;
777 785
778need_revalidate: 786need_revalidate:
779 dentry = do_revalidate(dentry, nd); 787 dentry = do_revalidate(dentry, nd);
@@ -1130,35 +1138,18 @@ static struct dentry *__lookup_hash(struct qstr *name,
1130 goto out; 1138 goto out;
1131 } 1139 }
1132 1140
1133 dentry = __d_lookup(base, name); 1141 /*
1134 1142 * Don't bother with __d_lookup: callers are for creat as
1135 /* lockess __d_lookup may fail due to concurrent d_move() 1143 * well as unlink, so a lot of the time it would cost
1136 * in some unrelated directory, so try with d_lookup 1144 * a double lookup.
1137 */ 1145 */
1138 if (!dentry) 1146 dentry = d_lookup(base, name);
1139 dentry = d_lookup(base, name);
1140 1147
1141 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 1148 if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
1142 dentry = do_revalidate(dentry, nd); 1149 dentry = do_revalidate(dentry, nd);
1143 1150
1144 if (!dentry) { 1151 if (!dentry)
1145 struct dentry *new; 1152 dentry = d_alloc_and_lookup(base, name, nd);
1146
1147 /* Don't create child dentry for a dead directory. */
1148 dentry = ERR_PTR(-ENOENT);
1149 if (IS_DEADDIR(inode))
1150 goto out;
1151
1152 new = d_alloc(base, name);
1153 dentry = ERR_PTR(-ENOMEM);
1154 if (!new)
1155 goto out;
1156 dentry = inode->i_op->lookup(inode, new, nd);
1157 if (!dentry)
1158 dentry = new;
1159 else
1160 dput(new);
1161 }
1162out: 1153out:
1163 return dentry; 1154 return dentry;
1164} 1155}
diff --git a/fs/namespace.c b/fs/namespace.c
index 2e10cb19c5b0..de402eb6eafb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -11,6 +11,8 @@
11#include <linux/syscalls.h> 11#include <linux/syscalls.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/spinlock.h>
15#include <linux/percpu.h>
14#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
15#include <linux/init.h> 17#include <linux/init.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
@@ -38,12 +40,10 @@
38#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) 40#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
39#define HASH_SIZE (1UL << HASH_SHIFT) 41#define HASH_SIZE (1UL << HASH_SHIFT)
40 42
41/* spinlock for vfsmount related operations, inplace of dcache_lock */
42__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
43
44static int event; 43static int event;
45static DEFINE_IDA(mnt_id_ida); 44static DEFINE_IDA(mnt_id_ida);
46static DEFINE_IDA(mnt_group_ida); 45static DEFINE_IDA(mnt_group_ida);
46static DEFINE_SPINLOCK(mnt_id_lock);
47static int mnt_id_start = 0; 47static int mnt_id_start = 0;
48static int mnt_group_start = 1; 48static int mnt_group_start = 1;
49 49
@@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem;
55struct kobject *fs_kobj; 55struct kobject *fs_kobj;
56EXPORT_SYMBOL_GPL(fs_kobj); 56EXPORT_SYMBOL_GPL(fs_kobj);
57 57
58/*
59 * vfsmount lock may be taken for read to prevent changes to the
60 * vfsmount hash, ie. during mountpoint lookups or walking back
61 * up the tree.
62 *
63 * It should be taken for write in all cases where the vfsmount
64 * tree or hash is modified or when a vfsmount structure is modified.
65 */
66DEFINE_BRLOCK(vfsmount_lock);
67
58static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 68static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
59{ 69{
60 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); 70 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
65 75
66#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) 76#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
67 77
68/* allocation is serialized by namespace_sem */ 78/*
79 * allocation is serialized by namespace_sem, but we need the spinlock to
80 * serialize with freeing.
81 */
69static int mnt_alloc_id(struct vfsmount *mnt) 82static int mnt_alloc_id(struct vfsmount *mnt)
70{ 83{
71 int res; 84 int res;
72 85
73retry: 86retry:
74 ida_pre_get(&mnt_id_ida, GFP_KERNEL); 87 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
75 spin_lock(&vfsmount_lock); 88 spin_lock(&mnt_id_lock);
76 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); 89 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
77 if (!res) 90 if (!res)
78 mnt_id_start = mnt->mnt_id + 1; 91 mnt_id_start = mnt->mnt_id + 1;
79 spin_unlock(&vfsmount_lock); 92 spin_unlock(&mnt_id_lock);
80 if (res == -EAGAIN) 93 if (res == -EAGAIN)
81 goto retry; 94 goto retry;
82 95
@@ -86,11 +99,11 @@ retry:
86static void mnt_free_id(struct vfsmount *mnt) 99static void mnt_free_id(struct vfsmount *mnt)
87{ 100{
88 int id = mnt->mnt_id; 101 int id = mnt->mnt_id;
89 spin_lock(&vfsmount_lock); 102 spin_lock(&mnt_id_lock);
90 ida_remove(&mnt_id_ida, id); 103 ida_remove(&mnt_id_ida, id);
91 if (mnt_id_start > id) 104 if (mnt_id_start > id)
92 mnt_id_start = id; 105 mnt_id_start = id;
93 spin_unlock(&vfsmount_lock); 106 spin_unlock(&mnt_id_lock);
94} 107}
95 108
96/* 109/*
@@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
348{ 361{
349 int ret = 0; 362 int ret = 0;
350 363
351 spin_lock(&vfsmount_lock); 364 br_write_lock(vfsmount_lock);
352 mnt->mnt_flags |= MNT_WRITE_HOLD; 365 mnt->mnt_flags |= MNT_WRITE_HOLD;
353 /* 366 /*
354 * After storing MNT_WRITE_HOLD, we'll read the counters. This store 367 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt)
382 */ 395 */
383 smp_wmb(); 396 smp_wmb();
384 mnt->mnt_flags &= ~MNT_WRITE_HOLD; 397 mnt->mnt_flags &= ~MNT_WRITE_HOLD;
385 spin_unlock(&vfsmount_lock); 398 br_write_unlock(vfsmount_lock);
386 return ret; 399 return ret;
387} 400}
388 401
389static void __mnt_unmake_readonly(struct vfsmount *mnt) 402static void __mnt_unmake_readonly(struct vfsmount *mnt)
390{ 403{
391 spin_lock(&vfsmount_lock); 404 br_write_lock(vfsmount_lock);
392 mnt->mnt_flags &= ~MNT_READONLY; 405 mnt->mnt_flags &= ~MNT_READONLY;
393 spin_unlock(&vfsmount_lock); 406 br_write_unlock(vfsmount_lock);
394} 407}
395 408
396void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) 409void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
@@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt)
414/* 427/*
415 * find the first or last mount at @dentry on vfsmount @mnt depending on 428 * find the first or last mount at @dentry on vfsmount @mnt depending on
416 * @dir. If @dir is set return the first mount else return the last mount. 429 * @dir. If @dir is set return the first mount else return the last mount.
430 * vfsmount_lock must be held for read or write.
417 */ 431 */
418struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, 432struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
419 int dir) 433 int dir)
@@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
443struct vfsmount *lookup_mnt(struct path *path) 457struct vfsmount *lookup_mnt(struct path *path)
444{ 458{
445 struct vfsmount *child_mnt; 459 struct vfsmount *child_mnt;
446 spin_lock(&vfsmount_lock); 460
461 br_read_lock(vfsmount_lock);
447 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) 462 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
448 mntget(child_mnt); 463 mntget(child_mnt);
449 spin_unlock(&vfsmount_lock); 464 br_read_unlock(vfsmount_lock);
450 return child_mnt; 465 return child_mnt;
451} 466}
452 467
@@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt)
455 return mnt->mnt_ns == current->nsproxy->mnt_ns; 470 return mnt->mnt_ns == current->nsproxy->mnt_ns;
456} 471}
457 472
473/*
474 * vfsmount lock must be held for write
475 */
458static void touch_mnt_namespace(struct mnt_namespace *ns) 476static void touch_mnt_namespace(struct mnt_namespace *ns)
459{ 477{
460 if (ns) { 478 if (ns) {
@@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns)
463 } 481 }
464} 482}
465 483
484/*
485 * vfsmount lock must be held for write
486 */
466static void __touch_mnt_namespace(struct mnt_namespace *ns) 487static void __touch_mnt_namespace(struct mnt_namespace *ns)
467{ 488{
468 if (ns && ns->event != event) { 489 if (ns && ns->event != event) {
@@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
471 } 492 }
472} 493}
473 494
495/*
496 * vfsmount lock must be held for write
497 */
474static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 498static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
475{ 499{
476 old_path->dentry = mnt->mnt_mountpoint; 500 old_path->dentry = mnt->mnt_mountpoint;
@@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
482 old_path->dentry->d_mounted--; 506 old_path->dentry->d_mounted--;
483} 507}
484 508
509/*
510 * vfsmount lock must be held for write
511 */
485void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, 512void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
486 struct vfsmount *child_mnt) 513 struct vfsmount *child_mnt)
487{ 514{
@@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
490 dentry->d_mounted++; 517 dentry->d_mounted++;
491} 518}
492 519
520/*
521 * vfsmount lock must be held for write
522 */
493static void attach_mnt(struct vfsmount *mnt, struct path *path) 523static void attach_mnt(struct vfsmount *mnt, struct path *path)
494{ 524{
495 mnt_set_mountpoint(path->mnt, path->dentry, mnt); 525 mnt_set_mountpoint(path->mnt, path->dentry, mnt);
@@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
499} 529}
500 530
501/* 531/*
502 * the caller must hold vfsmount_lock 532 * vfsmount lock must be held for write
503 */ 533 */
504static void commit_tree(struct vfsmount *mnt) 534static void commit_tree(struct vfsmount *mnt)
505{ 535{
@@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt)
623void mntput_no_expire(struct vfsmount *mnt) 653void mntput_no_expire(struct vfsmount *mnt)
624{ 654{
625repeat: 655repeat:
626 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { 656 if (atomic_add_unless(&mnt->mnt_count, -1, 1))
627 if (likely(!mnt->mnt_pinned)) { 657 return;
628 spin_unlock(&vfsmount_lock); 658 br_write_lock(vfsmount_lock);
629 __mntput(mnt); 659 if (!atomic_dec_and_test(&mnt->mnt_count)) {
630 return; 660 br_write_unlock(vfsmount_lock);
631 } 661 return;
632 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); 662 }
633 mnt->mnt_pinned = 0; 663 if (likely(!mnt->mnt_pinned)) {
634 spin_unlock(&vfsmount_lock); 664 br_write_unlock(vfsmount_lock);
635 acct_auto_close_mnt(mnt); 665 __mntput(mnt);
636 goto repeat; 666 return;
637 } 667 }
668 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
669 mnt->mnt_pinned = 0;
670 br_write_unlock(vfsmount_lock);
671 acct_auto_close_mnt(mnt);
672 goto repeat;
638} 673}
639
640EXPORT_SYMBOL(mntput_no_expire); 674EXPORT_SYMBOL(mntput_no_expire);
641 675
642void mnt_pin(struct vfsmount *mnt) 676void mnt_pin(struct vfsmount *mnt)
643{ 677{
644 spin_lock(&vfsmount_lock); 678 br_write_lock(vfsmount_lock);
645 mnt->mnt_pinned++; 679 mnt->mnt_pinned++;
646 spin_unlock(&vfsmount_lock); 680 br_write_unlock(vfsmount_lock);
647} 681}
648 682
649EXPORT_SYMBOL(mnt_pin); 683EXPORT_SYMBOL(mnt_pin);
650 684
651void mnt_unpin(struct vfsmount *mnt) 685void mnt_unpin(struct vfsmount *mnt)
652{ 686{
653 spin_lock(&vfsmount_lock); 687 br_write_lock(vfsmount_lock);
654 if (mnt->mnt_pinned) { 688 if (mnt->mnt_pinned) {
655 atomic_inc(&mnt->mnt_count); 689 atomic_inc(&mnt->mnt_count);
656 mnt->mnt_pinned--; 690 mnt->mnt_pinned--;
657 } 691 }
658 spin_unlock(&vfsmount_lock); 692 br_write_unlock(vfsmount_lock);
659} 693}
660 694
661EXPORT_SYMBOL(mnt_unpin); 695EXPORT_SYMBOL(mnt_unpin);
@@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p)
746 struct mnt_namespace *ns = p->ns; 780 struct mnt_namespace *ns = p->ns;
747 int res = 0; 781 int res = 0;
748 782
749 spin_lock(&vfsmount_lock); 783 br_read_lock(vfsmount_lock);
750 if (p->event != ns->event) { 784 if (p->event != ns->event) {
751 p->event = ns->event; 785 p->event = ns->event;
752 res = 1; 786 res = 1;
753 } 787 }
754 spin_unlock(&vfsmount_lock); 788 br_read_unlock(vfsmount_lock);
755 789
756 return res; 790 return res;
757} 791}
@@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt)
952 int minimum_refs = 0; 986 int minimum_refs = 0;
953 struct vfsmount *p; 987 struct vfsmount *p;
954 988
955 spin_lock(&vfsmount_lock); 989 br_read_lock(vfsmount_lock);
956 for (p = mnt; p; p = next_mnt(p, mnt)) { 990 for (p = mnt; p; p = next_mnt(p, mnt)) {
957 actual_refs += atomic_read(&p->mnt_count); 991 actual_refs += atomic_read(&p->mnt_count);
958 minimum_refs += 2; 992 minimum_refs += 2;
959 } 993 }
960 spin_unlock(&vfsmount_lock); 994 br_read_unlock(vfsmount_lock);
961 995
962 if (actual_refs > minimum_refs) 996 if (actual_refs > minimum_refs)
963 return 0; 997 return 0;
@@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt)
984{ 1018{
985 int ret = 1; 1019 int ret = 1;
986 down_read(&namespace_sem); 1020 down_read(&namespace_sem);
987 spin_lock(&vfsmount_lock); 1021 br_read_lock(vfsmount_lock);
988 if (propagate_mount_busy(mnt, 2)) 1022 if (propagate_mount_busy(mnt, 2))
989 ret = 0; 1023 ret = 0;
990 spin_unlock(&vfsmount_lock); 1024 br_read_unlock(vfsmount_lock);
991 up_read(&namespace_sem); 1025 up_read(&namespace_sem);
992 return ret; 1026 return ret;
993} 1027}
@@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head)
1003 if (mnt->mnt_parent != mnt) { 1037 if (mnt->mnt_parent != mnt) {
1004 struct dentry *dentry; 1038 struct dentry *dentry;
1005 struct vfsmount *m; 1039 struct vfsmount *m;
1006 spin_lock(&vfsmount_lock); 1040
1041 br_write_lock(vfsmount_lock);
1007 dentry = mnt->mnt_mountpoint; 1042 dentry = mnt->mnt_mountpoint;
1008 m = mnt->mnt_parent; 1043 m = mnt->mnt_parent;
1009 mnt->mnt_mountpoint = mnt->mnt_root; 1044 mnt->mnt_mountpoint = mnt->mnt_root;
1010 mnt->mnt_parent = mnt; 1045 mnt->mnt_parent = mnt;
1011 m->mnt_ghosts--; 1046 m->mnt_ghosts--;
1012 spin_unlock(&vfsmount_lock); 1047 br_write_unlock(vfsmount_lock);
1013 dput(dentry); 1048 dput(dentry);
1014 mntput(m); 1049 mntput(m);
1015 } 1050 }
@@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head)
1017 } 1052 }
1018} 1053}
1019 1054
1055/*
1056 * vfsmount lock must be held for write
1057 * namespace_sem must be held for write
1058 */
1020void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) 1059void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1021{ 1060{
1022 struct vfsmount *p; 1061 struct vfsmount *p;
@@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1107 } 1146 }
1108 1147
1109 down_write(&namespace_sem); 1148 down_write(&namespace_sem);
1110 spin_lock(&vfsmount_lock); 1149 br_write_lock(vfsmount_lock);
1111 event++; 1150 event++;
1112 1151
1113 if (!(flags & MNT_DETACH)) 1152 if (!(flags & MNT_DETACH))
@@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1119 umount_tree(mnt, 1, &umount_list); 1158 umount_tree(mnt, 1, &umount_list);
1120 retval = 0; 1159 retval = 0;
1121 } 1160 }
1122 spin_unlock(&vfsmount_lock); 1161 br_write_unlock(vfsmount_lock);
1123 up_write(&namespace_sem); 1162 up_write(&namespace_sem);
1124 release_mounts(&umount_list); 1163 release_mounts(&umount_list);
1125 return retval; 1164 return retval;
@@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1231 q = clone_mnt(p, p->mnt_root, flag); 1270 q = clone_mnt(p, p->mnt_root, flag);
1232 if (!q) 1271 if (!q)
1233 goto Enomem; 1272 goto Enomem;
1234 spin_lock(&vfsmount_lock); 1273 br_write_lock(vfsmount_lock);
1235 list_add_tail(&q->mnt_list, &res->mnt_list); 1274 list_add_tail(&q->mnt_list, &res->mnt_list);
1236 attach_mnt(q, &path); 1275 attach_mnt(q, &path);
1237 spin_unlock(&vfsmount_lock); 1276 br_write_unlock(vfsmount_lock);
1238 } 1277 }
1239 } 1278 }
1240 return res; 1279 return res;
1241Enomem: 1280Enomem:
1242 if (res) { 1281 if (res) {
1243 LIST_HEAD(umount_list); 1282 LIST_HEAD(umount_list);
1244 spin_lock(&vfsmount_lock); 1283 br_write_lock(vfsmount_lock);
1245 umount_tree(res, 0, &umount_list); 1284 umount_tree(res, 0, &umount_list);
1246 spin_unlock(&vfsmount_lock); 1285 br_write_unlock(vfsmount_lock);
1247 release_mounts(&umount_list); 1286 release_mounts(&umount_list);
1248 } 1287 }
1249 return NULL; 1288 return NULL;
@@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt)
1262{ 1301{
1263 LIST_HEAD(umount_list); 1302 LIST_HEAD(umount_list);
1264 down_write(&namespace_sem); 1303 down_write(&namespace_sem);
1265 spin_lock(&vfsmount_lock); 1304 br_write_lock(vfsmount_lock);
1266 umount_tree(mnt, 0, &umount_list); 1305 umount_tree(mnt, 0, &umount_list);
1267 spin_unlock(&vfsmount_lock); 1306 br_write_unlock(vfsmount_lock);
1268 up_write(&namespace_sem); 1307 up_write(&namespace_sem);
1269 release_mounts(&umount_list); 1308 release_mounts(&umount_list);
1270} 1309}
@@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1392 if (err) 1431 if (err)
1393 goto out_cleanup_ids; 1432 goto out_cleanup_ids;
1394 1433
1395 spin_lock(&vfsmount_lock); 1434 br_write_lock(vfsmount_lock);
1396 1435
1397 if (IS_MNT_SHARED(dest_mnt)) { 1436 if (IS_MNT_SHARED(dest_mnt)) {
1398 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1437 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1411 list_del_init(&child->mnt_hash); 1450 list_del_init(&child->mnt_hash);
1412 commit_tree(child); 1451 commit_tree(child);
1413 } 1452 }
1414 spin_unlock(&vfsmount_lock); 1453 br_write_unlock(vfsmount_lock);
1454
1415 return 0; 1455 return 0;
1416 1456
1417 out_cleanup_ids: 1457 out_cleanup_ids:
@@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag)
1466 goto out_unlock; 1506 goto out_unlock;
1467 } 1507 }
1468 1508
1469 spin_lock(&vfsmount_lock); 1509 br_write_lock(vfsmount_lock);
1470 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 1510 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1471 change_mnt_propagation(m, type); 1511 change_mnt_propagation(m, type);
1472 spin_unlock(&vfsmount_lock); 1512 br_write_unlock(vfsmount_lock);
1473 1513
1474 out_unlock: 1514 out_unlock:
1475 up_write(&namespace_sem); 1515 up_write(&namespace_sem);
@@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name,
1513 err = graft_tree(mnt, path); 1553 err = graft_tree(mnt, path);
1514 if (err) { 1554 if (err) {
1515 LIST_HEAD(umount_list); 1555 LIST_HEAD(umount_list);
1516 spin_lock(&vfsmount_lock); 1556
1557 br_write_lock(vfsmount_lock);
1517 umount_tree(mnt, 0, &umount_list); 1558 umount_tree(mnt, 0, &umount_list);
1518 spin_unlock(&vfsmount_lock); 1559 br_write_unlock(vfsmount_lock);
1519 release_mounts(&umount_list); 1560 release_mounts(&umount_list);
1520 } 1561 }
1521 1562
@@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1568 else 1609 else
1569 err = do_remount_sb(sb, flags, data, 0); 1610 err = do_remount_sb(sb, flags, data, 0);
1570 if (!err) { 1611 if (!err) {
1571 spin_lock(&vfsmount_lock); 1612 br_write_lock(vfsmount_lock);
1572 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; 1613 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
1573 path->mnt->mnt_flags = mnt_flags; 1614 path->mnt->mnt_flags = mnt_flags;
1574 spin_unlock(&vfsmount_lock); 1615 br_write_unlock(vfsmount_lock);
1575 } 1616 }
1576 up_write(&sb->s_umount); 1617 up_write(&sb->s_umount);
1577 if (!err) { 1618 if (!err) {
1578 spin_lock(&vfsmount_lock); 1619 br_write_lock(vfsmount_lock);
1579 touch_mnt_namespace(path->mnt->mnt_ns); 1620 touch_mnt_namespace(path->mnt->mnt_ns);
1580 spin_unlock(&vfsmount_lock); 1621 br_write_unlock(vfsmount_lock);
1581 } 1622 }
1582 return err; 1623 return err;
1583} 1624}
@@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1754 return; 1795 return;
1755 1796
1756 down_write(&namespace_sem); 1797 down_write(&namespace_sem);
1757 spin_lock(&vfsmount_lock); 1798 br_write_lock(vfsmount_lock);
1758 1799
1759 /* extract from the expiration list every vfsmount that matches the 1800 /* extract from the expiration list every vfsmount that matches the
1760 * following criteria: 1801 * following criteria:
@@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1773 touch_mnt_namespace(mnt->mnt_ns); 1814 touch_mnt_namespace(mnt->mnt_ns);
1774 umount_tree(mnt, 1, &umounts); 1815 umount_tree(mnt, 1, &umounts);
1775 } 1816 }
1776 spin_unlock(&vfsmount_lock); 1817 br_write_unlock(vfsmount_lock);
1777 up_write(&namespace_sem); 1818 up_write(&namespace_sem);
1778 1819
1779 release_mounts(&umounts); 1820 release_mounts(&umounts);
@@ -1830,6 +1871,8 @@ resume:
1830/* 1871/*
1831 * process a list of expirable mountpoints with the intent of discarding any 1872 * process a list of expirable mountpoints with the intent of discarding any
1832 * submounts of a specific parent mountpoint 1873 * submounts of a specific parent mountpoint
1874 *
1875 * vfsmount_lock must be held for write
1833 */ 1876 */
1834static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) 1877static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1835{ 1878{
@@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2048 kfree(new_ns); 2091 kfree(new_ns);
2049 return ERR_PTR(-ENOMEM); 2092 return ERR_PTR(-ENOMEM);
2050 } 2093 }
2051 spin_lock(&vfsmount_lock); 2094 br_write_lock(vfsmount_lock);
2052 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 2095 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
2053 spin_unlock(&vfsmount_lock); 2096 br_write_unlock(vfsmount_lock);
2054 2097
2055 /* 2098 /*
2056 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 2099 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2244 goto out2; /* not attached */ 2287 goto out2; /* not attached */
2245 /* make sure we can reach put_old from new_root */ 2288 /* make sure we can reach put_old from new_root */
2246 tmp = old.mnt; 2289 tmp = old.mnt;
2247 spin_lock(&vfsmount_lock); 2290 br_write_lock(vfsmount_lock);
2248 if (tmp != new.mnt) { 2291 if (tmp != new.mnt) {
2249 for (;;) { 2292 for (;;) {
2250 if (tmp->mnt_parent == tmp) 2293 if (tmp->mnt_parent == tmp)
@@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2264 /* mount new_root on / */ 2307 /* mount new_root on / */
2265 attach_mnt(new.mnt, &root_parent); 2308 attach_mnt(new.mnt, &root_parent);
2266 touch_mnt_namespace(current->nsproxy->mnt_ns); 2309 touch_mnt_namespace(current->nsproxy->mnt_ns);
2267 spin_unlock(&vfsmount_lock); 2310 br_write_unlock(vfsmount_lock);
2268 chroot_fs_refs(&root, &new); 2311 chroot_fs_refs(&root, &new);
2269 error = 0; 2312 error = 0;
2270 path_put(&root_parent); 2313 path_put(&root_parent);
@@ -2279,7 +2322,7 @@ out1:
2279out0: 2322out0:
2280 return error; 2323 return error;
2281out3: 2324out3:
2282 spin_unlock(&vfsmount_lock); 2325 br_write_unlock(vfsmount_lock);
2283 goto out2; 2326 goto out2;
2284} 2327}
2285 2328
@@ -2326,6 +2369,8 @@ void __init mnt_init(void)
2326 for (u = 0; u < HASH_SIZE; u++) 2369 for (u = 0; u < HASH_SIZE; u++)
2327 INIT_LIST_HEAD(&mount_hashtable[u]); 2370 INIT_LIST_HEAD(&mount_hashtable[u]);
2328 2371
2372 br_lock_init(vfsmount_lock);
2373
2329 err = sysfs_init(); 2374 err = sysfs_init();
2330 if (err) 2375 if (err)
2331 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2376 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns)
2344 if (!atomic_dec_and_test(&ns->count)) 2389 if (!atomic_dec_and_test(&ns->count))
2345 return; 2390 return;
2346 down_write(&namespace_sem); 2391 down_write(&namespace_sem);
2347 spin_lock(&vfsmount_lock); 2392 br_write_lock(vfsmount_lock);
2348 umount_tree(ns->root, 0, &umount_list); 2393 umount_tree(ns->root, 0, &umount_list);
2349 spin_unlock(&vfsmount_lock); 2394 br_write_unlock(vfsmount_lock);
2350 up_write(&namespace_sem); 2395 up_write(&namespace_sem);
2351 release_mounts(&umount_list); 2396 release_mounts(&umount_list);
2352 kfree(ns); 2397 kfree(ns);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index bee60c04109a..922263393c76 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
175{ 175{
176 struct the_nilfs *nilfs = sbi->s_nilfs; 176 struct the_nilfs *nilfs = sbi->s_nilfs;
177 int err; 177 int err;
178 int barrier_done = 0;
179 178
180 if (nilfs_test_opt(sbi, BARRIER)) {
181 set_buffer_ordered(nilfs->ns_sbh[0]);
182 barrier_done = 1;
183 }
184 retry: 179 retry:
185 set_buffer_dirty(nilfs->ns_sbh[0]); 180 set_buffer_dirty(nilfs->ns_sbh[0]);
186 err = sync_dirty_buffer(nilfs->ns_sbh[0]); 181
187 if (err == -EOPNOTSUPP && barrier_done) { 182 if (nilfs_test_opt(sbi, BARRIER)) {
188 nilfs_warning(sbi->s_super, __func__, 183 err = __sync_dirty_buffer(nilfs->ns_sbh[0],
189 "barrier-based sync failed. " 184 WRITE_SYNC | WRITE_BARRIER);
190 "disabling barriers\n"); 185 if (err == -EOPNOTSUPP) {
191 nilfs_clear_opt(sbi, BARRIER); 186 nilfs_warning(sbi->s_super, __func__,
192 barrier_done = 0; 187 "barrier-based sync failed. "
193 clear_buffer_ordered(nilfs->ns_sbh[0]); 188 "disabling barriers\n");
194 goto retry; 189 nilfs_clear_opt(sbi, BARRIER);
190 goto retry;
191 }
192 } else {
193 err = sync_dirty_buffer(nilfs->ns_sbh[0]);
195 } 194 }
195
196 if (unlikely(err)) { 196 if (unlikely(err)) {
197 printk(KERN_ERR 197 printk(KERN_ERR
198 "NILFS: unable to write superblock (err=%d)\n", err); 198 "NILFS: unable to write superblock (err=%d)\n", err);
diff --git a/fs/open.c b/fs/open.c
index 630715f9f73d..d74e1983e8dc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
675 f->f_path.mnt = mnt; 675 f->f_path.mnt = mnt;
676 f->f_pos = 0; 676 f->f_pos = 0;
677 f->f_op = fops_get(inode->i_fop); 677 f->f_op = fops_get(inode->i_fop);
678 file_move(f, &inode->i_sb->s_files); 678 file_sb_list_add(f, inode->i_sb);
679 679
680 error = security_dentry_open(f, cred); 680 error = security_dentry_open(f, cred);
681 if (error) 681 if (error)
@@ -721,7 +721,7 @@ cleanup_all:
721 mnt_drop_write(mnt); 721 mnt_drop_write(mnt);
722 } 722 }
723 } 723 }
724 file_kill(f); 724 file_sb_list_del(f);
725 f->f_path.dentry = NULL; 725 f->f_path.dentry = NULL;
726 f->f_path.mnt = NULL; 726 f->f_path.mnt = NULL;
727cleanup_file: 727cleanup_file:
diff --git a/fs/pnode.c b/fs/pnode.c
index 5cc564a83149..8066b8dd748f 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt)
126 return 0; 126 return 0;
127} 127}
128 128
129/*
130 * vfsmount lock must be held for write
131 */
129void change_mnt_propagation(struct vfsmount *mnt, int type) 132void change_mnt_propagation(struct vfsmount *mnt, int type)
130{ 133{
131 if (type == MS_SHARED) { 134 if (type == MS_SHARED) {
@@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
270 prev_src_mnt = child; 273 prev_src_mnt = child;
271 } 274 }
272out: 275out:
273 spin_lock(&vfsmount_lock); 276 br_write_lock(vfsmount_lock);
274 while (!list_empty(&tmp_list)) { 277 while (!list_empty(&tmp_list)) {
275 child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); 278 child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash);
276 umount_tree(child, 0, &umount_list); 279 umount_tree(child, 0, &umount_list);
277 } 280 }
278 spin_unlock(&vfsmount_lock); 281 br_write_unlock(vfsmount_lock);
279 release_mounts(&umount_list); 282 release_mounts(&umount_list);
280 return ret; 283 return ret;
281} 284}
@@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
296 * other mounts its parent propagates to. 299 * other mounts its parent propagates to.
297 * Check if any of these mounts that **do not have submounts** 300 * Check if any of these mounts that **do not have submounts**
298 * have more references than 'refcnt'. If so return busy. 301 * have more references than 'refcnt'. If so return busy.
302 *
303 * vfsmount lock must be held for read or write
299 */ 304 */
300int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 305int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
301{ 306{
@@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt)
353 * collect all mounts that receive propagation from the mount in @list, 358 * collect all mounts that receive propagation from the mount in @list,
354 * and return these additional mounts in the same list. 359 * and return these additional mounts in the same list.
355 * @list: the list of mounts to be unmounted. 360 * @list: the list of mounts to be unmounted.
361 *
362 * vfsmount lock must be held for write
356 */ 363 */
357int propagate_umount(struct list_head *list) 364int propagate_umount(struct list_head *list)
358{ 365{
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ae35413dcbe1..caa758377d66 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode)
83 dquot_drop(inode); 83 dquot_drop(inode);
84 inode->i_blocks = 0; 84 inode->i_blocks = 0;
85 reiserfs_write_unlock_once(inode->i_sb, depth); 85 reiserfs_write_unlock_once(inode->i_sb, depth);
86 return;
86 87
87no_delete: 88no_delete:
88 end_writeback(inode); 89 end_writeback(inode);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1ec952b1f036..812e2c05aa29 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb,
2311 /* flush out the real blocks */ 2311 /* flush out the real blocks */
2312 for (i = 0; i < get_desc_trans_len(desc); i++) { 2312 for (i = 0; i < get_desc_trans_len(desc); i++) {
2313 set_buffer_dirty(real_blocks[i]); 2313 set_buffer_dirty(real_blocks[i]);
2314 ll_rw_block(SWRITE, 1, real_blocks + i); 2314 write_dirty_buffer(real_blocks[i], WRITE);
2315 } 2315 }
2316 for (i = 0; i < get_desc_trans_len(desc); i++) { 2316 for (i = 0; i < get_desc_trans_len(desc); i++) {
2317 wait_on_buffer(real_blocks[i]); 2317 wait_on_buffer(real_blocks[i]);
diff --git a/fs/super.c b/fs/super.c
index 9674ab2c8718..8819e3a7ff20 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type)
54 s = NULL; 54 s = NULL;
55 goto out; 55 goto out;
56 } 56 }
57#ifdef CONFIG_SMP
58 s->s_files = alloc_percpu(struct list_head);
59 if (!s->s_files) {
60 security_sb_free(s);
61 kfree(s);
62 s = NULL;
63 goto out;
64 } else {
65 int i;
66
67 for_each_possible_cpu(i)
68 INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
69 }
70#else
57 INIT_LIST_HEAD(&s->s_files); 71 INIT_LIST_HEAD(&s->s_files);
72#endif
58 INIT_LIST_HEAD(&s->s_instances); 73 INIT_LIST_HEAD(&s->s_instances);
59 INIT_HLIST_HEAD(&s->s_anon); 74 INIT_HLIST_HEAD(&s->s_anon);
60 INIT_LIST_HEAD(&s->s_inodes); 75 INIT_LIST_HEAD(&s->s_inodes);
@@ -108,6 +123,9 @@ out:
108 */ 123 */
109static inline void destroy_super(struct super_block *s) 124static inline void destroy_super(struct super_block *s)
110{ 125{
126#ifdef CONFIG_SMP
127 free_percpu(s->s_files);
128#endif
111 security_sb_free(s); 129 security_sb_free(s);
112 kfree(s->s_subtype); 130 kfree(s->s_subtype);
113 kfree(s->s_options); 131 kfree(s->s_options);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 048484fb10d2..46f7a807bbc1 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
114 114
115 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 115 ubh_mark_buffer_dirty (USPI_UBH(uspi));
116 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 116 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
117 if (sb->s_flags & MS_SYNCHRONOUS) { 117 if (sb->s_flags & MS_SYNCHRONOUS)
118 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 118 ubh_sync_block(UCPI_UBH(ucpi));
119 ubh_wait_on_buffer (UCPI_UBH(ucpi));
120 }
121 sb->s_dirt = 1; 119 sb->s_dirt = 1;
122 120
123 unlock_super (sb); 121 unlock_super (sb);
@@ -207,10 +205,8 @@ do_more:
207 205
208 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 206 ubh_mark_buffer_dirty (USPI_UBH(uspi));
209 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 207 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
210 if (sb->s_flags & MS_SYNCHRONOUS) { 208 if (sb->s_flags & MS_SYNCHRONOUS)
211 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 209 ubh_sync_block(UCPI_UBH(ucpi));
212 ubh_wait_on_buffer (UCPI_UBH(ucpi));
213 }
214 210
215 if (overflow) { 211 if (overflow) {
216 fragment += count; 212 fragment += count;
@@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
558 554
559 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 555 ubh_mark_buffer_dirty (USPI_UBH(uspi));
560 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 556 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
561 if (sb->s_flags & MS_SYNCHRONOUS) { 557 if (sb->s_flags & MS_SYNCHRONOUS)
562 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 558 ubh_sync_block(UCPI_UBH(ucpi));
563 ubh_wait_on_buffer (UCPI_UBH(ucpi));
564 }
565 sb->s_dirt = 1; 559 sb->s_dirt = 1;
566 560
567 UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); 561 UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
@@ -680,10 +674,8 @@ cg_found:
680succed: 674succed:
681 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 675 ubh_mark_buffer_dirty (USPI_UBH(uspi));
682 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 676 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
683 if (sb->s_flags & MS_SYNCHRONOUS) { 677 if (sb->s_flags & MS_SYNCHRONOUS)
684 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 678 ubh_sync_block(UCPI_UBH(ucpi));
685 ubh_wait_on_buffer (UCPI_UBH(ucpi));
686 }
687 sb->s_dirt = 1; 679 sb->s_dirt = 1;
688 680
689 result += cgno * uspi->s_fpg; 681 result += cgno * uspi->s_fpg;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 428017e018fe..2eabf04af3de 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode)
113 113
114 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 114 ubh_mark_buffer_dirty (USPI_UBH(uspi));
115 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 115 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
116 if (sb->s_flags & MS_SYNCHRONOUS) { 116 if (sb->s_flags & MS_SYNCHRONOUS)
117 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 117 ubh_sync_block(UCPI_UBH(ucpi));
118 ubh_wait_on_buffer (UCPI_UBH(ucpi));
119 }
120 118
121 sb->s_dirt = 1; 119 sb->s_dirt = 1;
122 unlock_super (sb); 120 unlock_super (sb);
@@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
156 154
157 fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); 155 fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);
158 ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); 156 ubh_mark_buffer_dirty(UCPI_UBH(ucpi));
159 if (sb->s_flags & MS_SYNCHRONOUS) { 157 if (sb->s_flags & MS_SYNCHRONOUS)
160 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 158 ubh_sync_block(UCPI_UBH(ucpi));
161 ubh_wait_on_buffer(UCPI_UBH(ucpi));
162 }
163 159
164 UFSD("EXIT\n"); 160 UFSD("EXIT\n");
165} 161}
@@ -290,10 +286,8 @@ cg_found:
290 } 286 }
291 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 287 ubh_mark_buffer_dirty (USPI_UBH(uspi));
292 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 288 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
293 if (sb->s_flags & MS_SYNCHRONOUS) { 289 if (sb->s_flags & MS_SYNCHRONOUS)
294 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 290 ubh_sync_block(UCPI_UBH(ucpi));
295 ubh_wait_on_buffer (UCPI_UBH(ucpi));
296 }
297 sb->s_dirt = 1; 291 sb->s_dirt = 1;
298 292
299 inode->i_ino = cg * uspi->s_ipg + bit; 293 inode->i_ino = cg * uspi->s_ipg + bit;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 34d5cb135320..a58f9155fc9a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
243 ubh_bforget(ind_ubh); 243 ubh_bforget(ind_ubh);
244 ind_ubh = NULL; 244 ind_ubh = NULL;
245 } 245 }
246 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { 246 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
247 ubh_ll_rw_block(SWRITE, ind_ubh); 247 ubh_sync_block(ind_ubh);
248 ubh_wait_on_buffer (ind_ubh);
249 }
250 ubh_brelse (ind_ubh); 248 ubh_brelse (ind_ubh);
251 249
252 UFSD("EXIT: ino %lu\n", inode->i_ino); 250 UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
307 ubh_bforget(dind_bh); 305 ubh_bforget(dind_bh);
308 dind_bh = NULL; 306 dind_bh = NULL;
309 } 307 }
310 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { 308 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
311 ubh_ll_rw_block(SWRITE, dind_bh); 309 ubh_sync_block(dind_bh);
312 ubh_wait_on_buffer (dind_bh);
313 }
314 ubh_brelse (dind_bh); 310 ubh_brelse (dind_bh);
315 311
316 UFSD("EXIT: ino %lu\n", inode->i_ino); 312 UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode)
367 ubh_bforget(tind_bh); 363 ubh_bforget(tind_bh);
368 tind_bh = NULL; 364 tind_bh = NULL;
369 } 365 }
370 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { 366 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
371 ubh_ll_rw_block(SWRITE, tind_bh); 367 ubh_sync_block(tind_bh);
372 ubh_wait_on_buffer (tind_bh);
373 }
374 ubh_brelse (tind_bh); 368 ubh_brelse (tind_bh);
375 369
376 UFSD("EXIT: ino %lu\n", inode->i_ino); 370 UFSD("EXIT: ino %lu\n", inode->i_ino);
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 85a7fc9e4a4e..d2c36d53fe66 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag)
113 } 113 }
114} 114}
115 115
116void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) 116void ubh_sync_block(struct ufs_buffer_head *ubh)
117{ 117{
118 if (!ubh) 118 if (ubh) {
119 return; 119 unsigned i;
120 120
121 ll_rw_block(rw, ubh->count, ubh->bh); 121 for (i = 0; i < ubh->count; i++)
122} 122 write_dirty_buffer(ubh->bh[i], WRITE);
123 123
124void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) 124 for (i = 0; i < ubh->count; i++)
125{ 125 wait_on_buffer(ubh->bh[i]);
126 unsigned i; 126 }
127 if (!ubh)
128 return;
129 for ( i = 0; i < ubh->count; i++ )
130 wait_on_buffer (ubh->bh[i]);
131} 127}
132 128
133void ubh_bforget (struct ufs_buffer_head * ubh) 129void ubh_bforget (struct ufs_buffer_head * ubh)
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 0466036912f1..9f8775ce381c 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *);
269extern void ubh_brelse_uspi (struct ufs_sb_private_info *); 269extern void ubh_brelse_uspi (struct ufs_sb_private_info *);
270extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); 270extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *);
271extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); 271extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int);
272extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); 272extern void ubh_sync_block(struct ufs_buffer_head *);
273extern void ubh_wait_on_buffer (struct ufs_buffer_head *);
274extern void ubh_bforget (struct ufs_buffer_head *); 273extern void ubh_bforget (struct ufs_buffer_head *);
275extern int ubh_buffer_dirty (struct ufs_buffer_head *); 274extern int ubh_buffer_dirty (struct ufs_buffer_head *);
276#define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) 275#define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size)
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 43e649a72529..ec94c12f21da 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -32,7 +32,6 @@ enum bh_state_bits {
32 BH_Delay, /* Buffer is not yet allocated on disk */ 32 BH_Delay, /* Buffer is not yet allocated on disk */
33 BH_Boundary, /* Block is followed by a discontiguity */ 33 BH_Boundary, /* Block is followed by a discontiguity */
34 BH_Write_EIO, /* I/O error on write */ 34 BH_Write_EIO, /* I/O error on write */
35 BH_Ordered, /* ordered write */
36 BH_Eopnotsupp, /* operation not supported (barrier) */ 35 BH_Eopnotsupp, /* operation not supported (barrier) */
37 BH_Unwritten, /* Buffer is allocated on disk but not written */ 36 BH_Unwritten, /* Buffer is allocated on disk but not written */
38 BH_Quiet, /* Buffer Error Prinks to be quiet */ 37 BH_Quiet, /* Buffer Error Prinks to be quiet */
@@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write)
125BUFFER_FNS(Delay, delay) 124BUFFER_FNS(Delay, delay)
126BUFFER_FNS(Boundary, boundary) 125BUFFER_FNS(Boundary, boundary)
127BUFFER_FNS(Write_EIO, write_io_error) 126BUFFER_FNS(Write_EIO, write_io_error)
128BUFFER_FNS(Ordered, ordered)
129BUFFER_FNS(Eopnotsupp, eopnotsupp) 127BUFFER_FNS(Eopnotsupp, eopnotsupp)
130BUFFER_FNS(Unwritten, unwritten) 128BUFFER_FNS(Unwritten, unwritten)
131 129
@@ -183,6 +181,8 @@ void unlock_buffer(struct buffer_head *bh);
183void __lock_buffer(struct buffer_head *bh); 181void __lock_buffer(struct buffer_head *bh);
184void ll_rw_block(int, int, struct buffer_head * bh[]); 182void ll_rw_block(int, int, struct buffer_head * bh[]);
185int sync_dirty_buffer(struct buffer_head *bh); 183int sync_dirty_buffer(struct buffer_head *bh);
184int __sync_dirty_buffer(struct buffer_head *bh, int rw);
185void write_dirty_buffer(struct buffer_head *bh, int rw);
186int submit_bh(int, struct buffer_head *); 186int submit_bh(int, struct buffer_head *);
187void write_boundary_block(struct block_device *bdev, 187void write_boundary_block(struct block_device *bdev,
188 sector_t bblock, unsigned blocksize); 188 sector_t bblock, unsigned blocksize);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a96b4d83fc1..76041b614758 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -125,9 +125,6 @@ struct inodes_stat_t {
125 * block layer could (in theory) choose to ignore this 125 * block layer could (in theory) choose to ignore this
126 * request if it runs into resource problems. 126 * request if it runs into resource problems.
127 * WRITE A normal async write. Device will be plugged. 127 * WRITE A normal async write. Device will be plugged.
128 * SWRITE Like WRITE, but a special case for ll_rw_block() that
129 * tells it to lock the buffer first. Normally a buffer
130 * must be locked before doing IO.
131 * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down 128 * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down
132 * the hint that someone will be waiting on this IO 129 * the hint that someone will be waiting on this IO
133 * shortly. The device must still be unplugged explicitly, 130 * shortly. The device must still be unplugged explicitly,
@@ -138,9 +135,6 @@ struct inodes_stat_t {
138 * immediately after submission. The write equivalent 135 * immediately after submission. The write equivalent
139 * of READ_SYNC. 136 * of READ_SYNC.
140 * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. 137 * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
141 * SWRITE_SYNC
142 * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
143 * See SWRITE.
144 * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all 138 * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all
145 * previously submitted writes must be safely on storage 139 * previously submitted writes must be safely on storage
146 * before this one is started. Also guarantees that when 140 * before this one is started. Also guarantees that when
@@ -155,7 +149,6 @@ struct inodes_stat_t {
155#define READ 0 149#define READ 0
156#define WRITE RW_MASK 150#define WRITE RW_MASK
157#define READA RWA_MASK 151#define READA RWA_MASK
158#define SWRITE (WRITE | READA)
159 152
160#define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) 153#define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG)
161#define READ_META (READ | REQ_META) 154#define READ_META (READ | REQ_META)
@@ -165,8 +158,6 @@ struct inodes_stat_t {
165#define WRITE_META (WRITE | REQ_META) 158#define WRITE_META (WRITE | REQ_META)
166#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ 159#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
167 REQ_HARDBARRIER) 160 REQ_HARDBARRIER)
168#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE)
169#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
170 161
171/* 162/*
172 * These aren't really reads or writes, they pass down information about 163 * These aren't really reads or writes, they pass down information about
@@ -929,6 +920,9 @@ struct file {
929#define f_vfsmnt f_path.mnt 920#define f_vfsmnt f_path.mnt
930 const struct file_operations *f_op; 921 const struct file_operations *f_op;
931 spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ 922 spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
923#ifdef CONFIG_SMP
924 int f_sb_list_cpu;
925#endif
932 atomic_long_t f_count; 926 atomic_long_t f_count;
933 unsigned int f_flags; 927 unsigned int f_flags;
934 fmode_t f_mode; 928 fmode_t f_mode;
@@ -953,9 +947,6 @@ struct file {
953 unsigned long f_mnt_write_state; 947 unsigned long f_mnt_write_state;
954#endif 948#endif
955}; 949};
956extern spinlock_t files_lock;
957#define file_list_lock() spin_lock(&files_lock);
958#define file_list_unlock() spin_unlock(&files_lock);
959 950
960#define get_file(x) atomic_long_inc(&(x)->f_count) 951#define get_file(x) atomic_long_inc(&(x)->f_count)
961#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 952#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
@@ -1346,7 +1337,11 @@ struct super_block {
1346 1337
1347 struct list_head s_inodes; /* all inodes */ 1338 struct list_head s_inodes; /* all inodes */
1348 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ 1339 struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
1340#ifdef CONFIG_SMP
1341 struct list_head __percpu *s_files;
1342#else
1349 struct list_head s_files; 1343 struct list_head s_files;
1344#endif
1350 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ 1345 /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
1351 struct list_head s_dentry_lru; /* unused dentry lru */ 1346 struct list_head s_dentry_lru; /* unused dentry lru */
1352 int s_nr_dentry_unused; /* # of dentry on lru */ 1347 int s_nr_dentry_unused; /* # of dentry on lru */
@@ -2197,8 +2192,6 @@ static inline void insert_inode_hash(struct inode *inode) {
2197 __insert_inode_hash(inode, inode->i_ino); 2192 __insert_inode_hash(inode, inode->i_ino);
2198} 2193}
2199 2194
2200extern void file_move(struct file *f, struct list_head *list);
2201extern void file_kill(struct file *f);
2202#ifdef CONFIG_BLOCK 2195#ifdef CONFIG_BLOCK
2203extern void submit_bio(int, struct bio *); 2196extern void submit_bio(int, struct bio *);
2204extern int bdev_read_only(struct block_device *); 2197extern int bdev_read_only(struct block_device *);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index eca3d5202138..a42b5bf02f8b 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -5,7 +5,7 @@
5 5
6struct fs_struct { 6struct fs_struct {
7 int users; 7 int users;
8 rwlock_t lock; 8 spinlock_t lock;
9 int umask; 9 int umask;
10 int in_exec; 10 int in_exec;
11 struct path root, pwd; 11 struct path root, pwd;
@@ -23,29 +23,29 @@ extern int unshare_fs_struct(void);
23 23
24static inline void get_fs_root(struct fs_struct *fs, struct path *root) 24static inline void get_fs_root(struct fs_struct *fs, struct path *root)
25{ 25{
26 read_lock(&fs->lock); 26 spin_lock(&fs->lock);
27 *root = fs->root; 27 *root = fs->root;
28 path_get(root); 28 path_get(root);
29 read_unlock(&fs->lock); 29 spin_unlock(&fs->lock);
30} 30}
31 31
32static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) 32static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
33{ 33{
34 read_lock(&fs->lock); 34 spin_lock(&fs->lock);
35 *pwd = fs->pwd; 35 *pwd = fs->pwd;
36 path_get(pwd); 36 path_get(pwd);
37 read_unlock(&fs->lock); 37 spin_unlock(&fs->lock);
38} 38}
39 39
40static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, 40static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root,
41 struct path *pwd) 41 struct path *pwd)
42{ 42{
43 read_lock(&fs->lock); 43 spin_lock(&fs->lock);
44 *root = fs->root; 44 *root = fs->root;
45 path_get(root); 45 path_get(root);
46 *pwd = fs->pwd; 46 *pwd = fs->pwd;
47 path_get(pwd); 47 path_get(pwd);
48 read_unlock(&fs->lock); 48 spin_unlock(&fs->lock);
49} 49}
50 50
51#endif /* _LINUX_FS_STRUCT_H */ 51#endif /* _LINUX_FS_STRUCT_H */
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
new file mode 100644
index 000000000000..b288cb713b90
--- /dev/null
+++ b/include/linux/lglock.h
@@ -0,0 +1,172 @@
1/*
2 * Specialised local-global spinlock. Can only be declared as global variables
3 * to avoid overhead and keep things simple (and we don't want to start using
4 * these inside dynamically allocated structures).
5 *
6 * "local/global locks" (lglocks) can be used to:
7 *
8 * - Provide fast exclusive access to per-CPU data, with exclusive access to
9 * another CPU's data allowed but possibly subject to contention, and to
10 * provide very slow exclusive access to all per-CPU data.
11 * - Or to provide very fast and scalable read serialisation, and to provide
12 * very slow exclusive serialisation of data (not necessarily per-CPU data).
13 *
14 * Brlocks are also implemented as a short-hand notation for the latter use
15 * case.
16 *
17 * Copyright 2009, 2010, Nick Piggin, Novell Inc.
18 */
19#ifndef __LINUX_LGLOCK_H
20#define __LINUX_LGLOCK_H
21
22#include <linux/spinlock.h>
23#include <linux/lockdep.h>
24#include <linux/percpu.h>
25
26/* can make br locks by using local lock for read side, global lock for write */
27#define br_lock_init(name) name##_lock_init()
28#define br_read_lock(name) name##_local_lock()
29#define br_read_unlock(name) name##_local_unlock()
30#define br_write_lock(name) name##_global_lock_online()
31#define br_write_unlock(name) name##_global_unlock_online()
32
33#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name)
34#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name)
35
36
37#define lg_lock_init(name) name##_lock_init()
38#define lg_local_lock(name) name##_local_lock()
39#define lg_local_unlock(name) name##_local_unlock()
40#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu)
41#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu)
42#define lg_global_lock(name) name##_global_lock()
43#define lg_global_unlock(name) name##_global_unlock()
44#define lg_global_lock_online(name) name##_global_lock_online()
45#define lg_global_unlock_online(name) name##_global_unlock_online()
46
47#ifdef CONFIG_DEBUG_LOCK_ALLOC
48#define LOCKDEP_INIT_MAP lockdep_init_map
49
50#define DEFINE_LGLOCK_LOCKDEP(name) \
51 struct lock_class_key name##_lock_key; \
52 struct lockdep_map name##_lock_dep_map; \
53 EXPORT_SYMBOL(name##_lock_dep_map)
54
55#else
56#define LOCKDEP_INIT_MAP(a, b, c, d)
57
58#define DEFINE_LGLOCK_LOCKDEP(name)
59#endif
60
61
62#define DECLARE_LGLOCK(name) \
63 extern void name##_lock_init(void); \
64 extern void name##_local_lock(void); \
65 extern void name##_local_unlock(void); \
66 extern void name##_local_lock_cpu(int cpu); \
67 extern void name##_local_unlock_cpu(int cpu); \
68 extern void name##_global_lock(void); \
69 extern void name##_global_unlock(void); \
70 extern void name##_global_lock_online(void); \
71 extern void name##_global_unlock_online(void); \
72
73#define DEFINE_LGLOCK(name) \
74 \
75 DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \
76 DEFINE_LGLOCK_LOCKDEP(name); \
77 \
78 void name##_lock_init(void) { \
79 int i; \
80 LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
81 for_each_possible_cpu(i) { \
82 arch_spinlock_t *lock; \
83 lock = &per_cpu(name##_lock, i); \
84 *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \
85 } \
86 } \
87 EXPORT_SYMBOL(name##_lock_init); \
88 \
89 void name##_local_lock(void) { \
90 arch_spinlock_t *lock; \
91 preempt_disable(); \
92 rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \
93 lock = &__get_cpu_var(name##_lock); \
94 arch_spin_lock(lock); \
95 } \
96 EXPORT_SYMBOL(name##_local_lock); \
97 \
98 void name##_local_unlock(void) { \
99 arch_spinlock_t *lock; \
100 rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \
101 lock = &__get_cpu_var(name##_lock); \
102 arch_spin_unlock(lock); \
103 preempt_enable(); \
104 } \
105 EXPORT_SYMBOL(name##_local_unlock); \
106 \
107 void name##_local_lock_cpu(int cpu) { \
108 arch_spinlock_t *lock; \
109 preempt_disable(); \
110 rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \
111 lock = &per_cpu(name##_lock, cpu); \
112 arch_spin_lock(lock); \
113 } \
114 EXPORT_SYMBOL(name##_local_lock_cpu); \
115 \
116 void name##_local_unlock_cpu(int cpu) { \
117 arch_spinlock_t *lock; \
118 rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \
119 lock = &per_cpu(name##_lock, cpu); \
120 arch_spin_unlock(lock); \
121 preempt_enable(); \
122 } \
123 EXPORT_SYMBOL(name##_local_unlock_cpu); \
124 \
125 void name##_global_lock_online(void) { \
126 int i; \
127 preempt_disable(); \
128 rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
129 for_each_online_cpu(i) { \
130 arch_spinlock_t *lock; \
131 lock = &per_cpu(name##_lock, i); \
132 arch_spin_lock(lock); \
133 } \
134 } \
135 EXPORT_SYMBOL(name##_global_lock_online); \
136 \
137 void name##_global_unlock_online(void) { \
138 int i; \
139 rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
140 for_each_online_cpu(i) { \
141 arch_spinlock_t *lock; \
142 lock = &per_cpu(name##_lock, i); \
143 arch_spin_unlock(lock); \
144 } \
145 preempt_enable(); \
146 } \
147 EXPORT_SYMBOL(name##_global_unlock_online); \
148 \
149 void name##_global_lock(void) { \
150 int i; \
151 preempt_disable(); \
152 rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
153 for_each_online_cpu(i) { \
154 arch_spinlock_t *lock; \
155 lock = &per_cpu(name##_lock, i); \
156 arch_spin_lock(lock); \
157 } \
158 } \
159 EXPORT_SYMBOL(name##_global_lock); \
160 \
161 void name##_global_unlock(void) { \
162 int i; \
163 rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
164 for_each_online_cpu(i) { \
165 arch_spinlock_t *lock; \
166 lock = &per_cpu(name##_lock, i); \
167 arch_spin_unlock(lock); \
168 } \
169 preempt_enable(); \
170 } \
171 EXPORT_SYMBOL(name##_global_unlock);
172#endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1437da3ddc62..67d64e6efe7a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -329,6 +329,13 @@ struct tty_struct {
329 struct tty_port *port; 329 struct tty_port *port;
330}; 330};
331 331
332/* Each of a tty's open files has private_data pointing to tty_file_private */
333struct tty_file_private {
334 struct tty_struct *tty;
335 struct file *file;
336 struct list_head list;
337};
338
332/* tty magic number */ 339/* tty magic number */
333#define TTY_MAGIC 0x5401 340#define TTY_MAGIC 0x5401
334 341
@@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p);
458extern struct tty_struct *get_current_tty(void); 465extern struct tty_struct *get_current_tty(void);
459extern void tty_default_fops(struct file_operations *fops); 466extern void tty_default_fops(struct file_operations *fops);
460extern struct tty_struct *alloc_tty_struct(void); 467extern struct tty_struct *alloc_tty_struct(void);
468extern void tty_add_file(struct tty_struct *tty, struct file *file);
461extern void free_tty_struct(struct tty_struct *tty); 469extern void free_tty_struct(struct tty_struct *tty);
462extern void initialize_tty_struct(struct tty_struct *tty, 470extern void initialize_tty_struct(struct tty_struct *tty,
463 struct tty_driver *driver, int idx); 471 struct tty_driver *driver, int idx);
@@ -470,6 +478,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
470extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); 478extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
471 479
472extern struct mutex tty_mutex; 480extern struct mutex tty_mutex;
481extern spinlock_t tty_files_lock;
473 482
474extern void tty_write_unlock(struct tty_struct *tty); 483extern void tty_write_unlock(struct tty_struct *tty);
475extern int tty_write_lock(struct tty_struct *tty, int ndelay); 484extern int tty_write_lock(struct tty_struct *tty, int ndelay);
diff --git a/kernel/fork.c b/kernel/fork.c
index 98b450876f93..856eac3ec52e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -752,13 +752,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
752 struct fs_struct *fs = current->fs; 752 struct fs_struct *fs = current->fs;
753 if (clone_flags & CLONE_FS) { 753 if (clone_flags & CLONE_FS) {
754 /* tsk->fs is already what we want */ 754 /* tsk->fs is already what we want */
755 write_lock(&fs->lock); 755 spin_lock(&fs->lock);
756 if (fs->in_exec) { 756 if (fs->in_exec) {
757 write_unlock(&fs->lock); 757 spin_unlock(&fs->lock);
758 return -EAGAIN; 758 return -EAGAIN;
759 } 759 }
760 fs->users++; 760 fs->users++;
761 write_unlock(&fs->lock); 761 spin_unlock(&fs->lock);
762 return 0; 762 return 0;
763 } 763 }
764 tsk->fs = copy_fs_struct(fs); 764 tsk->fs = copy_fs_struct(fs);
@@ -1676,13 +1676,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1676 1676
1677 if (new_fs) { 1677 if (new_fs) {
1678 fs = current->fs; 1678 fs = current->fs;
1679 write_lock(&fs->lock); 1679 spin_lock(&fs->lock);
1680 current->fs = new_fs; 1680 current->fs = new_fs;
1681 if (--fs->users) 1681 if (--fs->users)
1682 new_fs = NULL; 1682 new_fs = NULL;
1683 else 1683 else
1684 new_fs = fs; 1684 new_fs = fs;
1685 write_unlock(&fs->lock); 1685 spin_unlock(&fs->lock);
1686 } 1686 }
1687 1687
1688 if (new_mm) { 1688 if (new_mm) {
diff --git a/security/apparmor/path.c b/security/apparmor/path.c
index 96bab9469d48..19358dc14605 100644
--- a/security/apparmor/path.c
+++ b/security/apparmor/path.c
@@ -62,19 +62,14 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
62 int deleted, connected; 62 int deleted, connected;
63 int error = 0; 63 int error = 0;
64 64
65 /* Get the root we want to resolve too */ 65 /* Get the root we want to resolve too, released below */
66 if (flags & PATH_CHROOT_REL) { 66 if (flags & PATH_CHROOT_REL) {
67 /* resolve paths relative to chroot */ 67 /* resolve paths relative to chroot */
68 read_lock(&current->fs->lock); 68 get_fs_root(current->fs, &root);
69 root = current->fs->root;
70 /* released below */
71 path_get(&root);
72 read_unlock(&current->fs->lock);
73 } else { 69 } else {
74 /* resolve paths relative to namespace */ 70 /* resolve paths relative to namespace */
75 root.mnt = current->nsproxy->mnt_ns->root; 71 root.mnt = current->nsproxy->mnt_ns->root;
76 root.dentry = root.mnt->mnt_root; 72 root.dentry = root.mnt->mnt_root;
77 /* released below */
78 path_get(&root); 73 path_get(&root);
79 } 74 }
80 75
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 42043f96e54f..4796ddd4e721 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2170,8 +2170,9 @@ static inline void flush_unauthorized_files(const struct cred *cred,
2170 2170
2171 tty = get_current_tty(); 2171 tty = get_current_tty();
2172 if (tty) { 2172 if (tty) {
2173 file_list_lock(); 2173 spin_lock(&tty_files_lock);
2174 if (!list_empty(&tty->tty_files)) { 2174 if (!list_empty(&tty->tty_files)) {
2175 struct tty_file_private *file_priv;
2175 struct inode *inode; 2176 struct inode *inode;
2176 2177
2177 /* Revalidate access to controlling tty. 2178 /* Revalidate access to controlling tty.
@@ -2179,14 +2180,16 @@ static inline void flush_unauthorized_files(const struct cred *cred,
2179 than using file_has_perm, as this particular open 2180 than using file_has_perm, as this particular open
2180 file may belong to another process and we are only 2181 file may belong to another process and we are only
2181 interested in the inode-based check here. */ 2182 interested in the inode-based check here. */
2182 file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list); 2183 file_priv = list_first_entry(&tty->tty_files,
2184 struct tty_file_private, list);
2185 file = file_priv->file;
2183 inode = file->f_path.dentry->d_inode; 2186 inode = file->f_path.dentry->d_inode;
2184 if (inode_has_perm(cred, inode, 2187 if (inode_has_perm(cred, inode,
2185 FILE__READ | FILE__WRITE, NULL)) { 2188 FILE__READ | FILE__WRITE, NULL)) {
2186 drop_tty = 1; 2189 drop_tty = 1;
2187 } 2190 }
2188 } 2191 }
2189 file_list_unlock(); 2192 spin_unlock(&tty_files_lock);
2190 tty_kref_put(tty); 2193 tty_kref_put(tty);
2191 } 2194 }
2192 /* Reset controlling tty. */ 2195 /* Reset controlling tty. */