aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-18 12:35:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-18 12:35:08 -0400
commit145c3ae46b37993b0debb0b3da6256daea4a6ec5 (patch)
tree0dbff382ce36b23b3d2dbff87d3eaab73a07a2a4 /fs
parent81ca03a0e2ea0207b2df80e0edcf4c775c07a505 (diff)
parent99b7db7b8ffd6bb755eb0a175596421a0b581cb2 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: fs: brlock vfsmount_lock fs: scale files_lock lglock: introduce special lglock and brlock spin locks tty: fix fu_list abuse fs: cleanup files_lock locking fs: remove extra lookup in __lookup_hash fs: fs_struct rwlock to spinlock apparmor: use task path helpers fs: dentry allocation consolidation fs: fix do_lookup false negative mbcache: Limit the maximum number of cache entries hostfs ->follow_link() braino hostfs: dumb (and usually harmless) tpyo - strncpy instead of strlcpy remove SWRITE* I/O types kill BH_Ordered flag vfs: update ctime when changing the file's permission by setfacl cramfs: only unlock new inodes fix reiserfs_evict_inode end_writeback second call
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c69
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/dcache.c71
-rw-r--r--fs/exec.c4
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/file_table.c124
-rw-r--r--fs/fs_struct.c32
-rw-r--r--fs/generic_acl.c1
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/internal.h7
-rw-r--r--fs/jbd/checkpoint.c4
-rw-r--r--fs/jbd/commit.c49
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jbd2/checkpoint.c4
-rw-r--r--fs/jbd2/commit.c39
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jbd2/revoke.c2
-rw-r--r--fs/mbcache.c30
-rw-r--r--fs/namei.c119
-rw-r--r--fs/namespace.c177
-rw-r--r--fs/nilfs2/super.c28
-rw-r--r--fs/open.c4
-rw-r--r--fs/pnode.c11
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/super.c18
-rw-r--r--fs/ufs/balloc.c24
-rw-r--r--fs/ufs/ialloc.c18
-rw-r--r--fs/ufs/truncate.c18
-rw-r--r--fs/ufs/util.c20
-rw-r--r--fs/ufs/util.h3
32 files changed, 518 insertions, 377 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 50efa339e051..3e7dca279d1c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
770 spin_unlock(lock); 770 spin_unlock(lock);
771 /* 771 /*
772 * Ensure any pending I/O completes so that 772 * Ensure any pending I/O completes so that
773 * ll_rw_block() actually writes the current 773 * write_dirty_buffer() actually writes the
774 * contents - it is a noop if I/O is still in 774 * current contents - it is a noop if I/O is
775 * flight on potentially older contents. 775 * still in flight on potentially older
776 * contents.
776 */ 777 */
777 ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); 778 write_dirty_buffer(bh, WRITE_SYNC_PLUG);
778 779
779 /* 780 /*
780 * Kick off IO for the previous mapping. Note 781 * Kick off IO for the previous mapping. Note
@@ -2912,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh)
2912 BUG_ON(buffer_unwritten(bh)); 2913 BUG_ON(buffer_unwritten(bh));
2913 2914
2914 /* 2915 /*
2915 * Mask in barrier bit for a write (could be either a WRITE or a
2916 * WRITE_SYNC
2917 */
2918 if (buffer_ordered(bh) && (rw & WRITE))
2919 rw |= WRITE_BARRIER;
2920
2921 /*
2922 * Only clear out a write error when rewriting 2916 * Only clear out a write error when rewriting
2923 */ 2917 */
2924 if (test_set_buffer_req(bh) && (rw & WRITE)) 2918 if (test_set_buffer_req(bh) && (rw & WRITE))
@@ -2956,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh);
2956 2950
2957/** 2951/**
2958 * ll_rw_block: low-level access to block devices (DEPRECATED) 2952 * ll_rw_block: low-level access to block devices (DEPRECATED)
2959 * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) 2953 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
2960 * @nr: number of &struct buffer_heads in the array 2954 * @nr: number of &struct buffer_heads in the array
2961 * @bhs: array of pointers to &struct buffer_head 2955 * @bhs: array of pointers to &struct buffer_head
2962 * 2956 *
2963 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and 2957 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
2964 * requests an I/O operation on them, either a %READ or a %WRITE. The third 2958 * requests an I/O operation on them, either a %READ or a %WRITE. The third
2965 * %SWRITE is like %WRITE only we make sure that the *current* data in buffers 2959 * %READA option is described in the documentation for generic_make_request()
2966 * are sent to disk. The fourth %READA option is described in the documentation 2960 * which ll_rw_block() calls.
2967 * for generic_make_request() which ll_rw_block() calls.
2968 * 2961 *
2969 * This function drops any buffer that it cannot get a lock on (with the 2962 * This function drops any buffer that it cannot get a lock on (with the
2970 * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be 2963 * BH_Lock state bit), any buffer that appears to be clean when doing a write
2971 * clean when doing a write request, and any buffer that appears to be 2964 * request, and any buffer that appears to be up-to-date when doing read
2972 * up-to-date when doing read request. Further it marks as clean buffers that 2965 * request. Further it marks as clean buffers that are processed for
2973 * are processed for writing (the buffer cache won't assume that they are 2966 * writing (the buffer cache won't assume that they are actually clean
2974 * actually clean until the buffer gets unlocked). 2967 * until the buffer gets unlocked).
2975 * 2968 *
2976 * ll_rw_block sets b_end_io to simple completion handler that marks 2969 * ll_rw_block sets b_end_io to simple completion handler that marks
2977 * the buffer up-to-date (if approriate), unlocks the buffer and wakes 2970 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -2987,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2987 for (i = 0; i < nr; i++) { 2980 for (i = 0; i < nr; i++) {
2988 struct buffer_head *bh = bhs[i]; 2981 struct buffer_head *bh = bhs[i];
2989 2982
2990 if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) 2983 if (!trylock_buffer(bh))
2991 lock_buffer(bh);
2992 else if (!trylock_buffer(bh))
2993 continue; 2984 continue;
2994 2985 if (rw == WRITE) {
2995 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
2996 rw == SWRITE_SYNC_PLUG) {
2997 if (test_clear_buffer_dirty(bh)) { 2986 if (test_clear_buffer_dirty(bh)) {
2998 bh->b_end_io = end_buffer_write_sync; 2987 bh->b_end_io = end_buffer_write_sync;
2999 get_bh(bh); 2988 get_bh(bh);
3000 if (rw == SWRITE_SYNC) 2989 submit_bh(WRITE, bh);
3001 submit_bh(WRITE_SYNC, bh);
3002 else
3003 submit_bh(WRITE, bh);
3004 continue; 2990 continue;
3005 } 2991 }
3006 } else { 2992 } else {
@@ -3016,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3016} 3002}
3017EXPORT_SYMBOL(ll_rw_block); 3003EXPORT_SYMBOL(ll_rw_block);
3018 3004
3005void write_dirty_buffer(struct buffer_head *bh, int rw)
3006{
3007 lock_buffer(bh);
3008 if (!test_clear_buffer_dirty(bh)) {
3009 unlock_buffer(bh);
3010 return;
3011 }
3012 bh->b_end_io = end_buffer_write_sync;
3013 get_bh(bh);
3014 submit_bh(rw, bh);
3015}
3016EXPORT_SYMBOL(write_dirty_buffer);
3017
3019/* 3018/*
3020 * For a data-integrity writeout, we need to wait upon any in-progress I/O 3019 * For a data-integrity writeout, we need to wait upon any in-progress I/O
3021 * and then start new I/O and then wait upon it. The caller must have a ref on 3020 * and then start new I/O and then wait upon it. The caller must have a ref on
3022 * the buffer_head. 3021 * the buffer_head.
3023 */ 3022 */
3024int sync_dirty_buffer(struct buffer_head *bh) 3023int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3025{ 3024{
3026 int ret = 0; 3025 int ret = 0;
3027 3026
@@ -3030,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
3030 if (test_clear_buffer_dirty(bh)) { 3029 if (test_clear_buffer_dirty(bh)) {
3031 get_bh(bh); 3030 get_bh(bh);
3032 bh->b_end_io = end_buffer_write_sync; 3031 bh->b_end_io = end_buffer_write_sync;
3033 ret = submit_bh(WRITE_SYNC, bh); 3032 ret = submit_bh(rw, bh);
3034 wait_on_buffer(bh); 3033 wait_on_buffer(bh);
3035 if (buffer_eopnotsupp(bh)) { 3034 if (buffer_eopnotsupp(bh)) {
3036 clear_buffer_eopnotsupp(bh); 3035 clear_buffer_eopnotsupp(bh);
@@ -3043,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
3043 } 3042 }
3044 return ret; 3043 return ret;
3045} 3044}
3045EXPORT_SYMBOL(__sync_dirty_buffer);
3046
3047int sync_dirty_buffer(struct buffer_head *bh)
3048{
3049 return __sync_dirty_buffer(bh, WRITE_SYNC);
3050}
3046EXPORT_SYMBOL(sync_dirty_buffer); 3051EXPORT_SYMBOL(sync_dirty_buffer);
3047 3052
3048/* 3053/*
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a53b130b366c..1e7a33028d33 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
80 } 80 }
81 } else { 81 } else {
82 inode = iget_locked(sb, CRAMINO(cramfs_inode)); 82 inode = iget_locked(sb, CRAMINO(cramfs_inode));
83 if (inode) { 83 if (inode && (inode->i_state & I_NEW)) {
84 setup_inode(inode, cramfs_inode); 84 setup_inode(inode, cramfs_inode);
85 unlock_new_inode(inode); 85 unlock_new_inode(inode);
86 } 86 }
diff --git a/fs/dcache.c b/fs/dcache.c
index 4d13bf50b7b1..83293be48149 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci);
1332 * d_lookup - search for a dentry 1332 * d_lookup - search for a dentry
1333 * @parent: parent dentry 1333 * @parent: parent dentry
1334 * @name: qstr of name we wish to find 1334 * @name: qstr of name we wish to find
1335 * Returns: dentry, or NULL
1335 * 1336 *
1336 * Searches the children of the parent dentry for the name in question. If 1337 * d_lookup searches the children of the parent dentry for the name in
1337 * the dentry is found its reference count is incremented and the dentry 1338 * question. If the dentry is found its reference count is incremented and the
1338 * is returned. The caller must use dput to free the entry when it has 1339 * dentry is returned. The caller must use dput to free the entry when it has
1339 * finished using it. %NULL is returned on failure. 1340 * finished using it. %NULL is returned if the dentry does not exist.
1340 *
1341 * __d_lookup is dcache_lock free. The hash list is protected using RCU.
1342 * Memory barriers are used while updating and doing lockless traversal.
1343 * To avoid races with d_move while rename is happening, d_lock is used.
1344 *
1345 * Overflows in memcmp(), while d_move, are avoided by keeping the length
1346 * and name pointer in one structure pointed by d_qstr.
1347 *
1348 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1349 * lookup is going on.
1350 *
1351 * The dentry unused LRU is not updated even if lookup finds the required dentry
1352 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1353 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1354 * acquisition.
1355 *
1356 * d_lookup() is protected against the concurrent renames in some unrelated
1357 * directory using the seqlockt_t rename_lock.
1358 */ 1341 */
1359
1360struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1342struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1361{ 1343{
1362 struct dentry * dentry = NULL; 1344 struct dentry * dentry = NULL;
@@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1372} 1354}
1373EXPORT_SYMBOL(d_lookup); 1355EXPORT_SYMBOL(d_lookup);
1374 1356
1357/*
1358 * __d_lookup - search for a dentry (racy)
1359 * @parent: parent dentry
1360 * @name: qstr of name we wish to find
1361 * Returns: dentry, or NULL
1362 *
1363 * __d_lookup is like d_lookup, however it may (rarely) return a
1364 * false-negative result due to unrelated rename activity.
1365 *
1366 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1367 * however it must be used carefully, eg. with a following d_lookup in
1368 * the case of failure.
1369 *
1370 * __d_lookup callers must be commented.
1371 */
1375struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1372struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1376{ 1373{
1377 unsigned int len = name->len; 1374 unsigned int len = name->len;
@@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1382 struct hlist_node *node; 1379 struct hlist_node *node;
1383 struct dentry *dentry; 1380 struct dentry *dentry;
1384 1381
1382 /*
1383 * The hash list is protected using RCU.
1384 *
1385 * Take d_lock when comparing a candidate dentry, to avoid races
1386 * with d_move().
1387 *
1388 * It is possible that concurrent renames can mess up our list
1389 * walk here and result in missing our dentry, resulting in the
1390 * false-negative result. d_lookup() protects against concurrent
1391 * renames using rename_lock seqlock.
1392 *
1393 * See Documentation/vfs/dcache-locking.txt for more details.
1394 */
1385 rcu_read_lock(); 1395 rcu_read_lock();
1386 1396
1387 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1397 hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
@@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1396 1406
1397 /* 1407 /*
1398 * Recheck the dentry after taking the lock - d_move may have 1408 * Recheck the dentry after taking the lock - d_move may have
1399 * changed things. Don't bother checking the hash because we're 1409 * changed things. Don't bother checking the hash because
1400 * about to compare the whole name anyway. 1410 * we're about to compare the whole name anyway.
1401 */ 1411 */
1402 if (dentry->d_parent != parent) 1412 if (dentry->d_parent != parent)
1403 goto next; 1413 goto next;
@@ -1925,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root,
1925 bool slash = false; 1935 bool slash = false;
1926 int error = 0; 1936 int error = 0;
1927 1937
1928 spin_lock(&vfsmount_lock); 1938 br_read_lock(vfsmount_lock);
1929 while (dentry != root->dentry || vfsmnt != root->mnt) { 1939 while (dentry != root->dentry || vfsmnt != root->mnt) {
1930 struct dentry * parent; 1940 struct dentry * parent;
1931 1941
@@ -1954,7 +1964,7 @@ out:
1954 if (!error && !slash) 1964 if (!error && !slash)
1955 error = prepend(buffer, buflen, "/", 1); 1965 error = prepend(buffer, buflen, "/", 1);
1956 1966
1957 spin_unlock(&vfsmount_lock); 1967 br_read_unlock(vfsmount_lock);
1958 return error; 1968 return error;
1959 1969
1960global_root: 1970global_root:
@@ -2292,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2)
2292 struct vfsmount *mnt = path1->mnt; 2302 struct vfsmount *mnt = path1->mnt;
2293 struct dentry *dentry = path1->dentry; 2303 struct dentry *dentry = path1->dentry;
2294 int res; 2304 int res;
2295 spin_lock(&vfsmount_lock); 2305
2306 br_read_lock(vfsmount_lock);
2296 if (mnt != path2->mnt) { 2307 if (mnt != path2->mnt) {
2297 for (;;) { 2308 for (;;) {
2298 if (mnt->mnt_parent == mnt) { 2309 if (mnt->mnt_parent == mnt) {
2299 spin_unlock(&vfsmount_lock); 2310 br_read_unlock(vfsmount_lock);
2300 return 0; 2311 return 0;
2301 } 2312 }
2302 if (mnt->mnt_parent == path2->mnt) 2313 if (mnt->mnt_parent == path2->mnt)
@@ -2306,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2)
2306 dentry = mnt->mnt_mountpoint; 2317 dentry = mnt->mnt_mountpoint;
2307 } 2318 }
2308 res = is_subdir(dentry, path2->dentry); 2319 res = is_subdir(dentry, path2->dentry);
2309 spin_unlock(&vfsmount_lock); 2320 br_read_unlock(vfsmount_lock);
2310 return res; 2321 return res;
2311} 2322}
2312EXPORT_SYMBOL(path_is_under); 2323EXPORT_SYMBOL(path_is_under);
diff --git a/fs/exec.c b/fs/exec.c
index 05c7d6b84df7..2d9455282744 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1118,7 +1118,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1118 bprm->unsafe = tracehook_unsafe_exec(p); 1118 bprm->unsafe = tracehook_unsafe_exec(p);
1119 1119
1120 n_fs = 1; 1120 n_fs = 1;
1121 write_lock(&p->fs->lock); 1121 spin_lock(&p->fs->lock);
1122 rcu_read_lock(); 1122 rcu_read_lock();
1123 for (t = next_thread(p); t != p; t = next_thread(t)) { 1123 for (t = next_thread(p); t != p; t = next_thread(t)) {
1124 if (t->fs == p->fs) 1124 if (t->fs == p->fs)
@@ -1135,7 +1135,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1135 res = 1; 1135 res = 1;
1136 } 1136 }
1137 } 1137 }
1138 write_unlock(&p->fs->lock); 1138 spin_unlock(&p->fs->lock);
1139 1139
1140 return res; 1140 return res;
1141} 1141}
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 1fa23f6ffba5..1736f2356388 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
250{ 250{
251 int i, err = 0; 251 int i, err = 0;
252 252
253 ll_rw_block(SWRITE, nr_bhs, bhs); 253 for (i = 0; i < nr_bhs; i++)
254 write_dirty_buffer(bhs[i], WRITE);
255
254 for (i = 0; i < nr_bhs; i++) { 256 for (i = 0; i < nr_bhs; i++) {
255 wait_on_buffer(bhs[i]); 257 wait_on_buffer(bhs[i]);
256 if (buffer_eopnotsupp(bhs[i])) { 258 if (buffer_eopnotsupp(bhs[i])) {
diff --git a/fs/file_table.c b/fs/file_table.c
index edecd36fed9b..a04bdd81c11c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -20,7 +20,9 @@
20#include <linux/cdev.h> 20#include <linux/cdev.h>
21#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/lglock.h>
23#include <linux/percpu_counter.h> 24#include <linux/percpu_counter.h>
25#include <linux/percpu.h>
24#include <linux/ima.h> 26#include <linux/ima.h>
25 27
26#include <asm/atomic.h> 28#include <asm/atomic.h>
@@ -32,8 +34,8 @@ struct files_stat_struct files_stat = {
32 .max_files = NR_FILE 34 .max_files = NR_FILE
33}; 35};
34 36
35/* public. Not pretty! */ 37DECLARE_LGLOCK(files_lglock);
36__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); 38DEFINE_LGLOCK(files_lglock);
37 39
38/* SLAB cache for file structures */ 40/* SLAB cache for file structures */
39static struct kmem_cache *filp_cachep __read_mostly; 41static struct kmem_cache *filp_cachep __read_mostly;
@@ -249,7 +251,7 @@ static void __fput(struct file *file)
249 cdev_put(inode->i_cdev); 251 cdev_put(inode->i_cdev);
250 fops_put(file->f_op); 252 fops_put(file->f_op);
251 put_pid(file->f_owner.pid); 253 put_pid(file->f_owner.pid);
252 file_kill(file); 254 file_sb_list_del(file);
253 if (file->f_mode & FMODE_WRITE) 255 if (file->f_mode & FMODE_WRITE)
254 drop_file_write_access(file); 256 drop_file_write_access(file);
255 file->f_path.dentry = NULL; 257 file->f_path.dentry = NULL;
@@ -328,41 +330,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
328 return file; 330 return file;
329} 331}
330 332
331
332void put_filp(struct file *file) 333void put_filp(struct file *file)
333{ 334{
334 if (atomic_long_dec_and_test(&file->f_count)) { 335 if (atomic_long_dec_and_test(&file->f_count)) {
335 security_file_free(file); 336 security_file_free(file);
336 file_kill(file); 337 file_sb_list_del(file);
337 file_free(file); 338 file_free(file);
338 } 339 }
339} 340}
340 341
341void file_move(struct file *file, struct list_head *list) 342static inline int file_list_cpu(struct file *file)
342{ 343{
343 if (!list) 344#ifdef CONFIG_SMP
344 return; 345 return file->f_sb_list_cpu;
345 file_list_lock(); 346#else
346 list_move(&file->f_u.fu_list, list); 347 return smp_processor_id();
347 file_list_unlock(); 348#endif
349}
350
351/* helper for file_sb_list_add to reduce ifdefs */
352static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
353{
354 struct list_head *list;
355#ifdef CONFIG_SMP
356 int cpu;
357 cpu = smp_processor_id();
358 file->f_sb_list_cpu = cpu;
359 list = per_cpu_ptr(sb->s_files, cpu);
360#else
361 list = &sb->s_files;
362#endif
363 list_add(&file->f_u.fu_list, list);
348} 364}
349 365
350void file_kill(struct file *file) 366/**
367 * file_sb_list_add - add a file to the sb's file list
368 * @file: file to add
369 * @sb: sb to add it to
370 *
371 * Use this function to associate a file with the superblock of the inode it
372 * refers to.
373 */
374void file_sb_list_add(struct file *file, struct super_block *sb)
375{
376 lg_local_lock(files_lglock);
377 __file_sb_list_add(file, sb);
378 lg_local_unlock(files_lglock);
379}
380
381/**
382 * file_sb_list_del - remove a file from the sb's file list
383 * @file: file to remove
384 * @sb: sb to remove it from
385 *
386 * Use this function to remove a file from its superblock.
387 */
388void file_sb_list_del(struct file *file)
351{ 389{
352 if (!list_empty(&file->f_u.fu_list)) { 390 if (!list_empty(&file->f_u.fu_list)) {
353 file_list_lock(); 391 lg_local_lock_cpu(files_lglock, file_list_cpu(file));
354 list_del_init(&file->f_u.fu_list); 392 list_del_init(&file->f_u.fu_list);
355 file_list_unlock(); 393 lg_local_unlock_cpu(files_lglock, file_list_cpu(file));
356 } 394 }
357} 395}
358 396
397#ifdef CONFIG_SMP
398
399/*
400 * These macros iterate all files on all CPUs for a given superblock.
401 * files_lglock must be held globally.
402 */
403#define do_file_list_for_each_entry(__sb, __file) \
404{ \
405 int i; \
406 for_each_possible_cpu(i) { \
407 struct list_head *list; \
408 list = per_cpu_ptr((__sb)->s_files, i); \
409 list_for_each_entry((__file), list, f_u.fu_list)
410
411#define while_file_list_for_each_entry \
412 } \
413}
414
415#else
416
417#define do_file_list_for_each_entry(__sb, __file) \
418{ \
419 struct list_head *list; \
420 list = &(sb)->s_files; \
421 list_for_each_entry((__file), list, f_u.fu_list)
422
423#define while_file_list_for_each_entry \
424}
425
426#endif
427
359int fs_may_remount_ro(struct super_block *sb) 428int fs_may_remount_ro(struct super_block *sb)
360{ 429{
361 struct file *file; 430 struct file *file;
362
363 /* Check that no files are currently opened for writing. */ 431 /* Check that no files are currently opened for writing. */
364 file_list_lock(); 432 lg_global_lock(files_lglock);
365 list_for_each_entry(file, &sb->s_files, f_u.fu_list) { 433 do_file_list_for_each_entry(sb, file) {
366 struct inode *inode = file->f_path.dentry->d_inode; 434 struct inode *inode = file->f_path.dentry->d_inode;
367 435
368 /* File with pending delete? */ 436 /* File with pending delete? */
@@ -372,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb)
372 /* Writeable file? */ 440 /* Writeable file? */
373 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) 441 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
374 goto too_bad; 442 goto too_bad;
375 } 443 } while_file_list_for_each_entry;
376 file_list_unlock(); 444 lg_global_unlock(files_lglock);
377 return 1; /* Tis' cool bro. */ 445 return 1; /* Tis' cool bro. */
378too_bad: 446too_bad:
379 file_list_unlock(); 447 lg_global_unlock(files_lglock);
380 return 0; 448 return 0;
381} 449}
382 450
@@ -392,8 +460,8 @@ void mark_files_ro(struct super_block *sb)
392 struct file *f; 460 struct file *f;
393 461
394retry: 462retry:
395 file_list_lock(); 463 lg_global_lock(files_lglock);
396 list_for_each_entry(f, &sb->s_files, f_u.fu_list) { 464 do_file_list_for_each_entry(sb, f) {
397 struct vfsmount *mnt; 465 struct vfsmount *mnt;
398 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) 466 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
399 continue; 467 continue;
@@ -408,16 +476,13 @@ retry:
408 continue; 476 continue;
409 file_release_write(f); 477 file_release_write(f);
410 mnt = mntget(f->f_path.mnt); 478 mnt = mntget(f->f_path.mnt);
411 file_list_unlock(); 479 /* This can sleep, so we can't hold the spinlock. */
412 /* 480 lg_global_unlock(files_lglock);
413 * This can sleep, so we can't hold
414 * the file_list_lock() spinlock.
415 */
416 mnt_drop_write(mnt); 481 mnt_drop_write(mnt);
417 mntput(mnt); 482 mntput(mnt);
418 goto retry; 483 goto retry;
419 } 484 } while_file_list_for_each_entry;
420 file_list_unlock(); 485 lg_global_unlock(files_lglock);
421} 486}
422 487
423void __init files_init(unsigned long mempages) 488void __init files_init(unsigned long mempages)
@@ -437,5 +502,6 @@ void __init files_init(unsigned long mempages)
437 if (files_stat.max_files < NR_FILE) 502 if (files_stat.max_files < NR_FILE)
438 files_stat.max_files = NR_FILE; 503 files_stat.max_files = NR_FILE;
439 files_defer_init(); 504 files_defer_init();
505 lg_lock_init(files_lglock);
440 percpu_counter_init(&nr_files, 0); 506 percpu_counter_init(&nr_files, 0);
441} 507}
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 1ee40eb9a2c0..ed45a9cf5f3d 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
13{ 13{
14 struct path old_root; 14 struct path old_root;
15 15
16 write_lock(&fs->lock); 16 spin_lock(&fs->lock);
17 old_root = fs->root; 17 old_root = fs->root;
18 fs->root = *path; 18 fs->root = *path;
19 path_get(path); 19 path_get(path);
20 write_unlock(&fs->lock); 20 spin_unlock(&fs->lock);
21 if (old_root.dentry) 21 if (old_root.dentry)
22 path_put(&old_root); 22 path_put(&old_root);
23} 23}
@@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
30{ 30{
31 struct path old_pwd; 31 struct path old_pwd;
32 32
33 write_lock(&fs->lock); 33 spin_lock(&fs->lock);
34 old_pwd = fs->pwd; 34 old_pwd = fs->pwd;
35 fs->pwd = *path; 35 fs->pwd = *path;
36 path_get(path); 36 path_get(path);
37 write_unlock(&fs->lock); 37 spin_unlock(&fs->lock);
38 38
39 if (old_pwd.dentry) 39 if (old_pwd.dentry)
40 path_put(&old_pwd); 40 path_put(&old_pwd);
@@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
51 task_lock(p); 51 task_lock(p);
52 fs = p->fs; 52 fs = p->fs;
53 if (fs) { 53 if (fs) {
54 write_lock(&fs->lock); 54 spin_lock(&fs->lock);
55 if (fs->root.dentry == old_root->dentry 55 if (fs->root.dentry == old_root->dentry
56 && fs->root.mnt == old_root->mnt) { 56 && fs->root.mnt == old_root->mnt) {
57 path_get(new_root); 57 path_get(new_root);
@@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
64 fs->pwd = *new_root; 64 fs->pwd = *new_root;
65 count++; 65 count++;
66 } 66 }
67 write_unlock(&fs->lock); 67 spin_unlock(&fs->lock);
68 } 68 }
69 task_unlock(p); 69 task_unlock(p);
70 } while_each_thread(g, p); 70 } while_each_thread(g, p);
@@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk)
87 if (fs) { 87 if (fs) {
88 int kill; 88 int kill;
89 task_lock(tsk); 89 task_lock(tsk);
90 write_lock(&fs->lock); 90 spin_lock(&fs->lock);
91 tsk->fs = NULL; 91 tsk->fs = NULL;
92 kill = !--fs->users; 92 kill = !--fs->users;
93 write_unlock(&fs->lock); 93 spin_unlock(&fs->lock);
94 task_unlock(tsk); 94 task_unlock(tsk);
95 if (kill) 95 if (kill)
96 free_fs_struct(fs); 96 free_fs_struct(fs);
@@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
104 if (fs) { 104 if (fs) {
105 fs->users = 1; 105 fs->users = 1;
106 fs->in_exec = 0; 106 fs->in_exec = 0;
107 rwlock_init(&fs->lock); 107 spin_lock_init(&fs->lock);
108 fs->umask = old->umask; 108 fs->umask = old->umask;
109 get_fs_root_and_pwd(old, &fs->root, &fs->pwd); 109 get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
110 } 110 }
@@ -121,10 +121,10 @@ int unshare_fs_struct(void)
121 return -ENOMEM; 121 return -ENOMEM;
122 122
123 task_lock(current); 123 task_lock(current);
124 write_lock(&fs->lock); 124 spin_lock(&fs->lock);
125 kill = !--fs->users; 125 kill = !--fs->users;
126 current->fs = new_fs; 126 current->fs = new_fs;
127 write_unlock(&fs->lock); 127 spin_unlock(&fs->lock);
128 task_unlock(current); 128 task_unlock(current);
129 129
130 if (kill) 130 if (kill)
@@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask);
143/* to be mentioned only in INIT_TASK */ 143/* to be mentioned only in INIT_TASK */
144struct fs_struct init_fs = { 144struct fs_struct init_fs = {
145 .users = 1, 145 .users = 1,
146 .lock = __RW_LOCK_UNLOCKED(init_fs.lock), 146 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
147 .umask = 0022, 147 .umask = 0022,
148}; 148};
149 149
@@ -156,14 +156,14 @@ void daemonize_fs_struct(void)
156 156
157 task_lock(current); 157 task_lock(current);
158 158
159 write_lock(&init_fs.lock); 159 spin_lock(&init_fs.lock);
160 init_fs.users++; 160 init_fs.users++;
161 write_unlock(&init_fs.lock); 161 spin_unlock(&init_fs.lock);
162 162
163 write_lock(&fs->lock); 163 spin_lock(&fs->lock);
164 current->fs = &init_fs; 164 current->fs = &init_fs;
165 kill = !--fs->users; 165 kill = !--fs->users;
166 write_unlock(&fs->lock); 166 spin_unlock(&fs->lock);
167 167
168 task_unlock(current); 168 task_unlock(current);
169 if (kill) 169 if (kill)
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 99800e564157..6bc9e3a5a693 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
94 if (error < 0) 94 if (error < 0)
95 goto failed; 95 goto failed;
96 inode->i_mode = mode; 96 inode->i_mode = mode;
97 inode->i_ctime = CURRENT_TIME;
97 if (error == 0) { 98 if (error == 0) {
98 posix_acl_release(acl); 99 posix_acl_release(acl);
99 acl = NULL; 100 acl = NULL;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index dd1e55535a4e..f7dc9b5f9ef8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name)
104 __putname(name); 104 __putname(name);
105 return NULL; 105 return NULL;
106 } 106 }
107 strncpy(name, root, PATH_MAX); 107 strlcpy(name, root, PATH_MAX);
108 if (len > p - name) { 108 if (len > p - name) {
109 __putname(name); 109 __putname(name);
110 return NULL; 110 return NULL;
@@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
876 char *path = dentry_name(dentry); 876 char *path = dentry_name(dentry);
877 int err = -ENOMEM; 877 int err = -ENOMEM;
878 if (path) { 878 if (path) {
879 int err = hostfs_do_readlink(path, link, PATH_MAX); 879 err = hostfs_do_readlink(path, link, PATH_MAX);
880 if (err == PATH_MAX) 880 if (err == PATH_MAX)
881 err = -E2BIG; 881 err = -E2BIG;
882 __putname(path); 882 __putname(path);
diff --git a/fs/internal.h b/fs/internal.h
index 6b706bc60a66..a6910e91cee8 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,6 +9,8 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/lglock.h>
13
12struct super_block; 14struct super_block;
13struct linux_binprm; 15struct linux_binprm;
14struct path; 16struct path;
@@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
70 72
71extern void __init mnt_init(void); 73extern void __init mnt_init(void);
72 74
73extern spinlock_t vfsmount_lock; 75DECLARE_BRLOCK(vfsmount_lock);
76
74 77
75/* 78/*
76 * fs_struct.c 79 * fs_struct.c
@@ -80,6 +83,8 @@ extern void chroot_fs_refs(struct path *, struct path *);
80/* 83/*
81 * file_table.c 84 * file_table.c
82 */ 85 */
86extern void file_sb_list_add(struct file *f, struct super_block *sb);
87extern void file_sb_list_del(struct file *f);
83extern void mark_files_ro(struct super_block *); 88extern void mark_files_ro(struct super_block *);
84extern struct file *get_empty_filp(void); 89extern struct file *get_empty_filp(void);
85 90
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index b0435dd0654d..05a38b9c4c0e 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
254{ 254{
255 int i; 255 int i;
256 256
257 ll_rw_block(SWRITE, *batch_count, bhs); 257 for (i = 0; i < *batch_count; i++)
258 write_dirty_buffer(bhs[i], WRITE);
259
258 for (i = 0; i < *batch_count; i++) { 260 for (i = 0; i < *batch_count; i++) {
259 struct buffer_head *bh = bhs[i]; 261 struct buffer_head *bh = bhs[i];
260 clear_buffer_jwrite(bh); 262 clear_buffer_jwrite(bh);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 28a9ddaa0c49..95d8c11c929e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal,
119 struct buffer_head *bh; 119 struct buffer_head *bh;
120 journal_header_t *header; 120 journal_header_t *header;
121 int ret; 121 int ret;
122 int barrier_done = 0;
123 122
124 if (is_journal_aborted(journal)) 123 if (is_journal_aborted(journal))
125 return 0; 124 return 0;
@@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal,
137 136
138 JBUFFER_TRACE(descriptor, "write commit block"); 137 JBUFFER_TRACE(descriptor, "write commit block");
139 set_buffer_dirty(bh); 138 set_buffer_dirty(bh);
139
140 if (journal->j_flags & JFS_BARRIER) { 140 if (journal->j_flags & JFS_BARRIER) {
141 set_buffer_ordered(bh); 141 ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER);
142 barrier_done = 1;
143 }
144 ret = sync_dirty_buffer(bh);
145 if (barrier_done)
146 clear_buffer_ordered(bh);
147 /* is it possible for another commit to fail at roughly
148 * the same time as this one? If so, we don't want to
149 * trust the barrier flag in the super, but instead want
150 * to remember if we sent a barrier request
151 */
152 if (ret == -EOPNOTSUPP && barrier_done) {
153 char b[BDEVNAME_SIZE];
154 142
155 printk(KERN_WARNING 143 /*
156 "JBD: barrier-based sync failed on %s - " 144 * Is it possible for another commit to fail at roughly
157 "disabling barriers\n", 145 * the same time as this one? If so, we don't want to
158 bdevname(journal->j_dev, b)); 146 * trust the barrier flag in the super, but instead want
159 spin_lock(&journal->j_state_lock); 147 * to remember if we sent a barrier request
160 journal->j_flags &= ~JFS_BARRIER; 148 */
161 spin_unlock(&journal->j_state_lock); 149 if (ret == -EOPNOTSUPP) {
150 char b[BDEVNAME_SIZE];
162 151
163 /* And try again, without the barrier */ 152 printk(KERN_WARNING
164 set_buffer_uptodate(bh); 153 "JBD: barrier-based sync failed on %s - "
165 set_buffer_dirty(bh); 154 "disabling barriers\n",
155 bdevname(journal->j_dev, b));
156 spin_lock(&journal->j_state_lock);
157 journal->j_flags &= ~JFS_BARRIER;
158 spin_unlock(&journal->j_state_lock);
159
160 /* And try again, without the barrier */
161 set_buffer_uptodate(bh);
162 set_buffer_dirty(bh);
163 ret = sync_dirty_buffer(bh);
164 }
165 } else {
166 ret = sync_dirty_buffer(bh); 166 ret = sync_dirty_buffer(bh);
167 } 167 }
168
168 put_bh(bh); /* One for getblk() */ 169 put_bh(bh); /* One for getblk() */
169 journal_put_journal_head(descriptor); 170 journal_put_journal_head(descriptor);
170 171
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f19ce94693d8..2c4b1f109da9 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait)
1024 if (wait) 1024 if (wait)
1025 sync_dirty_buffer(bh); 1025 sync_dirty_buffer(bh);
1026 else 1026 else
1027 ll_rw_block(SWRITE, 1, &bh); 1027 write_dirty_buffer(bh, WRITE);
1028 1028
1029out: 1029out:
1030 /* If we have just flushed the log (by marking s_start==0), then 1030 /* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index ad717328343a..d29018307e2e 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
617 set_buffer_jwrite(bh); 617 set_buffer_jwrite(bh);
618 BUFFER_TRACE(bh, "write"); 618 BUFFER_TRACE(bh, "write");
619 set_buffer_dirty(bh); 619 set_buffer_dirty(bh);
620 ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); 620 write_dirty_buffer(bh, write_op);
621} 621}
622#endif 622#endif
623 623
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1c23a0f4e8a3..5247e7ffdcb4 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count)
255{ 255{
256 int i; 256 int i;
257 257
258 ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); 258 for (i = 0; i < *batch_count; i++)
259 write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE);
260
259 for (i = 0; i < *batch_count; i++) { 261 for (i = 0; i < *batch_count; i++) {
260 struct buffer_head *bh = journal->j_chkpt_bhs[i]; 262 struct buffer_head *bh = journal->j_chkpt_bhs[i];
261 clear_buffer_jwrite(bh); 263 clear_buffer_jwrite(bh);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f52e5e8049f1..7c068c189d80 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal,
101 struct commit_header *tmp; 101 struct commit_header *tmp;
102 struct buffer_head *bh; 102 struct buffer_head *bh;
103 int ret; 103 int ret;
104 int barrier_done = 0;
105 struct timespec now = current_kernel_time(); 104 struct timespec now = current_kernel_time();
106 105
107 if (is_journal_aborted(journal)) 106 if (is_journal_aborted(journal))
@@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal,
136 if (journal->j_flags & JBD2_BARRIER && 135 if (journal->j_flags & JBD2_BARRIER &&
137 !JBD2_HAS_INCOMPAT_FEATURE(journal, 136 !JBD2_HAS_INCOMPAT_FEATURE(journal,
138 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 137 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
139 set_buffer_ordered(bh); 138 ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh);
140 barrier_done = 1; 139 if (ret == -EOPNOTSUPP) {
141 } 140 printk(KERN_WARNING
142 ret = submit_bh(WRITE_SYNC_PLUG, bh); 141 "JBD2: Disabling barriers on %s, "
143 if (barrier_done) 142 "not supported by device\n", journal->j_devname);
144 clear_buffer_ordered(bh); 143 write_lock(&journal->j_state_lock);
145 144 journal->j_flags &= ~JBD2_BARRIER;
146 /* is it possible for another commit to fail at roughly 145 write_unlock(&journal->j_state_lock);
147 * the same time as this one? If so, we don't want to
148 * trust the barrier flag in the super, but instead want
149 * to remember if we sent a barrier request
150 */
151 if (ret == -EOPNOTSUPP && barrier_done) {
152 printk(KERN_WARNING
153 "JBD2: Disabling barriers on %s, "
154 "not supported by device\n", journal->j_devname);
155 write_lock(&journal->j_state_lock);
156 journal->j_flags &= ~JBD2_BARRIER;
157 write_unlock(&journal->j_state_lock);
158 146
159 /* And try again, without the barrier */ 147 /* And try again, without the barrier */
160 lock_buffer(bh); 148 lock_buffer(bh);
161 set_buffer_uptodate(bh); 149 set_buffer_uptodate(bh);
162 clear_buffer_dirty(bh); 150 clear_buffer_dirty(bh);
151 ret = submit_bh(WRITE_SYNC_PLUG, bh);
152 }
153 } else {
163 ret = submit_bh(WRITE_SYNC_PLUG, bh); 154 ret = submit_bh(WRITE_SYNC_PLUG, bh);
164 } 155 }
165 *cbh = bh; 156 *cbh = bh;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ad5866aaf0f9..0e8014ea6b94 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1124 set_buffer_uptodate(bh); 1124 set_buffer_uptodate(bh);
1125 } 1125 }
1126 } else 1126 } else
1127 ll_rw_block(SWRITE, 1, &bh); 1127 write_dirty_buffer(bh, WRITE);
1128 1128
1129out: 1129out:
1130 /* If we have just flushed the log (by marking s_start==0), then 1130 /* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index a360b06af2e3..9ad321fd63fd 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
625 set_buffer_jwrite(bh); 625 set_buffer_jwrite(bh);
626 BUFFER_TRACE(bh, "write"); 626 BUFFER_TRACE(bh, "write");
627 set_buffer_dirty(bh); 627 set_buffer_dirty(bh);
628 ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); 628 write_dirty_buffer(bh, write_op);
629} 629}
630#endif 630#endif
631 631
diff --git a/fs/mbcache.c b/fs/mbcache.c
index cf4e6cdfd15b..93444747237b 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -80,6 +80,7 @@ struct mb_cache {
80 struct list_head c_cache_list; 80 struct list_head c_cache_list;
81 const char *c_name; 81 const char *c_name;
82 atomic_t c_entry_count; 82 atomic_t c_entry_count;
83 int c_max_entries;
83 int c_bucket_bits; 84 int c_bucket_bits;
84 struct kmem_cache *c_entry_cache; 85 struct kmem_cache *c_entry_cache;
85 struct list_head *c_block_hash; 86 struct list_head *c_block_hash;
@@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits)
243 if (!cache->c_entry_cache) 244 if (!cache->c_entry_cache)
244 goto fail2; 245 goto fail2;
245 246
247 /*
248 * Set an upper limit on the number of cache entries so that the hash
249 * chains won't grow too long.
250 */
251 cache->c_max_entries = bucket_count << 4;
252
246 spin_lock(&mb_cache_spinlock); 253 spin_lock(&mb_cache_spinlock);
247 list_add(&cache->c_cache_list, &mb_cache_list); 254 list_add(&cache->c_cache_list, &mb_cache_list);
248 spin_unlock(&mb_cache_spinlock); 255 spin_unlock(&mb_cache_spinlock);
@@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache)
333 kfree(cache); 340 kfree(cache);
334} 341}
335 342
336
337/* 343/*
338 * mb_cache_entry_alloc() 344 * mb_cache_entry_alloc()
339 * 345 *
@@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache)
345struct mb_cache_entry * 351struct mb_cache_entry *
346mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) 352mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
347{ 353{
348 struct mb_cache_entry *ce; 354 struct mb_cache_entry *ce = NULL;
349 355
350 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); 356 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
351 if (ce) { 357 spin_lock(&mb_cache_spinlock);
358 if (!list_empty(&mb_cache_lru_list)) {
359 ce = list_entry(mb_cache_lru_list.next,
360 struct mb_cache_entry, e_lru_list);
361 list_del_init(&ce->e_lru_list);
362 __mb_cache_entry_unhash(ce);
363 }
364 spin_unlock(&mb_cache_spinlock);
365 }
366 if (!ce) {
367 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
368 if (!ce)
369 return NULL;
352 atomic_inc(&cache->c_entry_count); 370 atomic_inc(&cache->c_entry_count);
353 INIT_LIST_HEAD(&ce->e_lru_list); 371 INIT_LIST_HEAD(&ce->e_lru_list);
354 INIT_LIST_HEAD(&ce->e_block_list); 372 INIT_LIST_HEAD(&ce->e_block_list);
355 ce->e_cache = cache; 373 ce->e_cache = cache;
356 ce->e_used = 1 + MB_CACHE_WRITER;
357 ce->e_queued = 0; 374 ce->e_queued = 0;
358 } 375 }
376 ce->e_used = 1 + MB_CACHE_WRITER;
359 return ce; 377 return ce;
360} 378}
361 379
diff --git a/fs/namei.c b/fs/namei.c
index 17ea76bf2fbe..24896e833565 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -595,15 +595,16 @@ int follow_up(struct path *path)
595{ 595{
596 struct vfsmount *parent; 596 struct vfsmount *parent;
597 struct dentry *mountpoint; 597 struct dentry *mountpoint;
598 spin_lock(&vfsmount_lock); 598
599 br_read_lock(vfsmount_lock);
599 parent = path->mnt->mnt_parent; 600 parent = path->mnt->mnt_parent;
600 if (parent == path->mnt) { 601 if (parent == path->mnt) {
601 spin_unlock(&vfsmount_lock); 602 br_read_unlock(vfsmount_lock);
602 return 0; 603 return 0;
603 } 604 }
604 mntget(parent); 605 mntget(parent);
605 mountpoint = dget(path->mnt->mnt_mountpoint); 606 mountpoint = dget(path->mnt->mnt_mountpoint);
606 spin_unlock(&vfsmount_lock); 607 br_read_unlock(vfsmount_lock);
607 dput(path->dentry); 608 dput(path->dentry);
608 path->dentry = mountpoint; 609 path->dentry = mountpoint;
609 mntput(path->mnt); 610 mntput(path->mnt);
@@ -686,6 +687,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
686} 687}
687 688
688/* 689/*
690 * Allocate a dentry with name and parent, and perform a parent
691 * directory ->lookup on it. Returns the new dentry, or ERR_PTR
692 * on error. parent->d_inode->i_mutex must be held. d_lookup must
693 * have verified that no child exists while under i_mutex.
694 */
695static struct dentry *d_alloc_and_lookup(struct dentry *parent,
696 struct qstr *name, struct nameidata *nd)
697{
698 struct inode *inode = parent->d_inode;
699 struct dentry *dentry;
700 struct dentry *old;
701
702 /* Don't create child dentry for a dead directory. */
703 if (unlikely(IS_DEADDIR(inode)))
704 return ERR_PTR(-ENOENT);
705
706 dentry = d_alloc(parent, name);
707 if (unlikely(!dentry))
708 return ERR_PTR(-ENOMEM);
709
710 old = inode->i_op->lookup(inode, dentry, nd);
711 if (unlikely(old)) {
712 dput(dentry);
713 dentry = old;
714 }
715 return dentry;
716}
717
718/*
689 * It's more convoluted than I'd like it to be, but... it's still fairly 719 * It's more convoluted than I'd like it to be, but... it's still fairly
690 * small and for now I'd prefer to have fast path as straight as possible. 720 * small and for now I'd prefer to have fast path as straight as possible.
691 * It _is_ time-critical. 721 * It _is_ time-critical.
@@ -706,9 +736,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
706 return err; 736 return err;
707 } 737 }
708 738
739 /*
740 * Rename seqlock is not required here because in the off chance
741 * of a false negative due to a concurrent rename, we're going to
742 * do the non-racy lookup, below.
743 */
709 dentry = __d_lookup(nd->path.dentry, name); 744 dentry = __d_lookup(nd->path.dentry, name);
710 if (!dentry) 745 if (!dentry)
711 goto need_lookup; 746 goto need_lookup;
747found:
712 if (dentry->d_op && dentry->d_op->d_revalidate) 748 if (dentry->d_op && dentry->d_op->d_revalidate)
713 goto need_revalidate; 749 goto need_revalidate;
714done: 750done:
@@ -724,56 +760,28 @@ need_lookup:
724 mutex_lock(&dir->i_mutex); 760 mutex_lock(&dir->i_mutex);
725 /* 761 /*
726 * First re-do the cached lookup just in case it was created 762 * First re-do the cached lookup just in case it was created
727 * while we waited for the directory semaphore.. 763 * while we waited for the directory semaphore, or the first
764 * lookup failed due to an unrelated rename.
728 * 765 *
729 * FIXME! This could use version numbering or similar to 766 * This could use version numbering or similar to avoid unnecessary
730 * avoid unnecessary cache lookups. 767 * cache lookups, but then we'd have to do the first lookup in the
731 * 768 * non-racy way. However in the common case here, everything should
732 * The "dcache_lock" is purely to protect the RCU list walker 769 * be hot in cache, so would it be a big win?
733 * from concurrent renames at this point (we mustn't get false
734 * negatives from the RCU list walk here, unlike the optimistic
735 * fast walk).
736 *
737 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
738 */ 770 */
739 dentry = d_lookup(parent, name); 771 dentry = d_lookup(parent, name);
740 if (!dentry) { 772 if (likely(!dentry)) {
741 struct dentry *new; 773 dentry = d_alloc_and_lookup(parent, name, nd);
742
743 /* Don't create child dentry for a dead directory. */
744 dentry = ERR_PTR(-ENOENT);
745 if (IS_DEADDIR(dir))
746 goto out_unlock;
747
748 new = d_alloc(parent, name);
749 dentry = ERR_PTR(-ENOMEM);
750 if (new) {
751 dentry = dir->i_op->lookup(dir, new, nd);
752 if (dentry)
753 dput(new);
754 else
755 dentry = new;
756 }
757out_unlock:
758 mutex_unlock(&dir->i_mutex); 774 mutex_unlock(&dir->i_mutex);
759 if (IS_ERR(dentry)) 775 if (IS_ERR(dentry))
760 goto fail; 776 goto fail;
761 goto done; 777 goto done;
762 } 778 }
763
764 /* 779 /*
765 * Uhhuh! Nasty case: the cache was re-populated while 780 * Uhhuh! Nasty case: the cache was re-populated while
766 * we waited on the semaphore. Need to revalidate. 781 * we waited on the semaphore. Need to revalidate.
767 */ 782 */
768 mutex_unlock(&dir->i_mutex); 783 mutex_unlock(&dir->i_mutex);
769 if (dentry->d_op && dentry->d_op->d_revalidate) { 784 goto found;
770 dentry = do_revalidate(dentry, nd);
771 if (!dentry)
772 dentry = ERR_PTR(-ENOENT);
773 }
774 if (IS_ERR(dentry))
775 goto fail;
776 goto done;
777 785
778need_revalidate: 786need_revalidate:
779 dentry = do_revalidate(dentry, nd); 787 dentry = do_revalidate(dentry, nd);
@@ -1130,35 +1138,18 @@ static struct dentry *__lookup_hash(struct qstr *name,
1130 goto out; 1138 goto out;
1131 } 1139 }
1132 1140
1133 dentry = __d_lookup(base, name); 1141 /*
1134 1142 * Don't bother with __d_lookup: callers are for creat as
1135 /* lockess __d_lookup may fail due to concurrent d_move() 1143 * well as unlink, so a lot of the time it would cost
1136 * in some unrelated directory, so try with d_lookup 1144 * a double lookup.
1137 */ 1145 */
1138 if (!dentry) 1146 dentry = d_lookup(base, name);
1139 dentry = d_lookup(base, name);
1140 1147
1141 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 1148 if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
1142 dentry = do_revalidate(dentry, nd); 1149 dentry = do_revalidate(dentry, nd);
1143 1150
1144 if (!dentry) { 1151 if (!dentry)
1145 struct dentry *new; 1152 dentry = d_alloc_and_lookup(base, name, nd);
1146
1147 /* Don't create child dentry for a dead directory. */
1148 dentry = ERR_PTR(-ENOENT);
1149 if (IS_DEADDIR(inode))
1150 goto out;
1151
1152 new = d_alloc(base, name);
1153 dentry = ERR_PTR(-ENOMEM);
1154 if (!new)
1155 goto out;
1156 dentry = inode->i_op->lookup(inode, new, nd);
1157 if (!dentry)
1158 dentry = new;
1159 else
1160 dput(new);
1161 }
1162out: 1153out:
1163 return dentry; 1154 return dentry;
1164} 1155}
diff --git a/fs/namespace.c b/fs/namespace.c
index 2e10cb19c5b0..de402eb6eafb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -11,6 +11,8 @@
11#include <linux/syscalls.h> 11#include <linux/syscalls.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/spinlock.h>
15#include <linux/percpu.h>
14#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
15#include <linux/init.h> 17#include <linux/init.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
@@ -38,12 +40,10 @@
38#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) 40#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
39#define HASH_SIZE (1UL << HASH_SHIFT) 41#define HASH_SIZE (1UL << HASH_SHIFT)
40 42
41/* spinlock for vfsmount related operations, inplace of dcache_lock */
42__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
43
44static int event; 43static int event;
45static DEFINE_IDA(mnt_id_ida); 44static DEFINE_IDA(mnt_id_ida);
46static DEFINE_IDA(mnt_group_ida); 45static DEFINE_IDA(mnt_group_ida);
46static DEFINE_SPINLOCK(mnt_id_lock);
47static int mnt_id_start = 0; 47static int mnt_id_start = 0;
48static int mnt_group_start = 1; 48static int mnt_group_start = 1;
49 49
@@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem;
55struct kobject *fs_kobj; 55struct kobject *fs_kobj;
56EXPORT_SYMBOL_GPL(fs_kobj); 56EXPORT_SYMBOL_GPL(fs_kobj);
57 57
58/*
59 * vfsmount lock may be taken for read to prevent changes to the
60 * vfsmount hash, ie. during mountpoint lookups or walking back
61 * up the tree.
62 *
63 * It should be taken for write in all cases where the vfsmount
64 * tree or hash is modified or when a vfsmount structure is modified.
65 */
66DEFINE_BRLOCK(vfsmount_lock);
67
58static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 68static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
59{ 69{
60 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); 70 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
65 75
66#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) 76#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
67 77
68/* allocation is serialized by namespace_sem */ 78/*
79 * allocation is serialized by namespace_sem, but we need the spinlock to
80 * serialize with freeing.
81 */
69static int mnt_alloc_id(struct vfsmount *mnt) 82static int mnt_alloc_id(struct vfsmount *mnt)
70{ 83{
71 int res; 84 int res;
72 85
73retry: 86retry:
74 ida_pre_get(&mnt_id_ida, GFP_KERNEL); 87 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
75 spin_lock(&vfsmount_lock); 88 spin_lock(&mnt_id_lock);
76 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); 89 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
77 if (!res) 90 if (!res)
78 mnt_id_start = mnt->mnt_id + 1; 91 mnt_id_start = mnt->mnt_id + 1;
79 spin_unlock(&vfsmount_lock); 92 spin_unlock(&mnt_id_lock);
80 if (res == -EAGAIN) 93 if (res == -EAGAIN)
81 goto retry; 94 goto retry;
82 95
@@ -86,11 +99,11 @@ retry:
86static void mnt_free_id(struct vfsmount *mnt) 99static void mnt_free_id(struct vfsmount *mnt)
87{ 100{
88 int id = mnt->mnt_id; 101 int id = mnt->mnt_id;
89 spin_lock(&vfsmount_lock); 102 spin_lock(&mnt_id_lock);
90 ida_remove(&mnt_id_ida, id); 103 ida_remove(&mnt_id_ida, id);
91 if (mnt_id_start > id) 104 if (mnt_id_start > id)
92 mnt_id_start = id; 105 mnt_id_start = id;
93 spin_unlock(&vfsmount_lock); 106 spin_unlock(&mnt_id_lock);
94} 107}
95 108
96/* 109/*
@@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
348{ 361{
349 int ret = 0; 362 int ret = 0;
350 363
351 spin_lock(&vfsmount_lock); 364 br_write_lock(vfsmount_lock);
352 mnt->mnt_flags |= MNT_WRITE_HOLD; 365 mnt->mnt_flags |= MNT_WRITE_HOLD;
353 /* 366 /*
354 * After storing MNT_WRITE_HOLD, we'll read the counters. This store 367 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt)
382 */ 395 */
383 smp_wmb(); 396 smp_wmb();
384 mnt->mnt_flags &= ~MNT_WRITE_HOLD; 397 mnt->mnt_flags &= ~MNT_WRITE_HOLD;
385 spin_unlock(&vfsmount_lock); 398 br_write_unlock(vfsmount_lock);
386 return ret; 399 return ret;
387} 400}
388 401
389static void __mnt_unmake_readonly(struct vfsmount *mnt) 402static void __mnt_unmake_readonly(struct vfsmount *mnt)
390{ 403{
391 spin_lock(&vfsmount_lock); 404 br_write_lock(vfsmount_lock);
392 mnt->mnt_flags &= ~MNT_READONLY; 405 mnt->mnt_flags &= ~MNT_READONLY;
393 spin_unlock(&vfsmount_lock); 406 br_write_unlock(vfsmount_lock);
394} 407}
395 408
396void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) 409void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
@@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt)
414/* 427/*
415 * find the first or last mount at @dentry on vfsmount @mnt depending on 428 * find the first or last mount at @dentry on vfsmount @mnt depending on
416 * @dir. If @dir is set return the first mount else return the last mount. 429 * @dir. If @dir is set return the first mount else return the last mount.
430 * vfsmount_lock must be held for read or write.
417 */ 431 */
418struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, 432struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
419 int dir) 433 int dir)
@@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
443struct vfsmount *lookup_mnt(struct path *path) 457struct vfsmount *lookup_mnt(struct path *path)
444{ 458{
445 struct vfsmount *child_mnt; 459 struct vfsmount *child_mnt;
446 spin_lock(&vfsmount_lock); 460
461 br_read_lock(vfsmount_lock);
447 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) 462 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
448 mntget(child_mnt); 463 mntget(child_mnt);
449 spin_unlock(&vfsmount_lock); 464 br_read_unlock(vfsmount_lock);
450 return child_mnt; 465 return child_mnt;
451} 466}
452 467
@@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt)
455 return mnt->mnt_ns == current->nsproxy->mnt_ns; 470 return mnt->mnt_ns == current->nsproxy->mnt_ns;
456} 471}
457 472
473/*
474 * vfsmount lock must be held for write
475 */
458static void touch_mnt_namespace(struct mnt_namespace *ns) 476static void touch_mnt_namespace(struct mnt_namespace *ns)
459{ 477{
460 if (ns) { 478 if (ns) {
@@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns)
463 } 481 }
464} 482}
465 483
484/*
485 * vfsmount lock must be held for write
486 */
466static void __touch_mnt_namespace(struct mnt_namespace *ns) 487static void __touch_mnt_namespace(struct mnt_namespace *ns)
467{ 488{
468 if (ns && ns->event != event) { 489 if (ns && ns->event != event) {
@@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
471 } 492 }
472} 493}
473 494
495/*
496 * vfsmount lock must be held for write
497 */
474static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 498static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
475{ 499{
476 old_path->dentry = mnt->mnt_mountpoint; 500 old_path->dentry = mnt->mnt_mountpoint;
@@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
482 old_path->dentry->d_mounted--; 506 old_path->dentry->d_mounted--;
483} 507}
484 508
509/*
510 * vfsmount lock must be held for write
511 */
485void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, 512void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
486 struct vfsmount *child_mnt) 513 struct vfsmount *child_mnt)
487{ 514{
@@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
490 dentry->d_mounted++; 517 dentry->d_mounted++;
491} 518}
492 519
520/*
521 * vfsmount lock must be held for write
522 */
493static void attach_mnt(struct vfsmount *mnt, struct path *path) 523static void attach_mnt(struct vfsmount *mnt, struct path *path)
494{ 524{
495 mnt_set_mountpoint(path->mnt, path->dentry, mnt); 525 mnt_set_mountpoint(path->mnt, path->dentry, mnt);
@@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
499} 529}
500 530
501/* 531/*
502 * the caller must hold vfsmount_lock 532 * vfsmount lock must be held for write
503 */ 533 */
504static void commit_tree(struct vfsmount *mnt) 534static void commit_tree(struct vfsmount *mnt)
505{ 535{
@@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt)
623void mntput_no_expire(struct vfsmount *mnt) 653void mntput_no_expire(struct vfsmount *mnt)
624{ 654{
625repeat: 655repeat:
626 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { 656 if (atomic_add_unless(&mnt->mnt_count, -1, 1))
627 if (likely(!mnt->mnt_pinned)) { 657 return;
628 spin_unlock(&vfsmount_lock); 658 br_write_lock(vfsmount_lock);
629 __mntput(mnt); 659 if (!atomic_dec_and_test(&mnt->mnt_count)) {
630 return; 660 br_write_unlock(vfsmount_lock);
631 } 661 return;
632 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); 662 }
633 mnt->mnt_pinned = 0; 663 if (likely(!mnt->mnt_pinned)) {
634 spin_unlock(&vfsmount_lock); 664 br_write_unlock(vfsmount_lock);
635 acct_auto_close_mnt(mnt); 665 __mntput(mnt);
636 goto repeat; 666 return;
637 } 667 }
668 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
669 mnt->mnt_pinned = 0;
670 br_write_unlock(vfsmount_lock);
671 acct_auto_close_mnt(mnt);
672 goto repeat;
638} 673}
639
640EXPORT_SYMBOL(mntput_no_expire); 674EXPORT_SYMBOL(mntput_no_expire);
641 675
642void mnt_pin(struct vfsmount *mnt) 676void mnt_pin(struct vfsmount *mnt)
643{ 677{
644 spin_lock(&vfsmount_lock); 678 br_write_lock(vfsmount_lock);
645 mnt->mnt_pinned++; 679 mnt->mnt_pinned++;
646 spin_unlock(&vfsmount_lock); 680 br_write_unlock(vfsmount_lock);
647} 681}
648 682
649EXPORT_SYMBOL(mnt_pin); 683EXPORT_SYMBOL(mnt_pin);
650 684
651void mnt_unpin(struct vfsmount *mnt) 685void mnt_unpin(struct vfsmount *mnt)
652{ 686{
653 spin_lock(&vfsmount_lock); 687 br_write_lock(vfsmount_lock);
654 if (mnt->mnt_pinned) { 688 if (mnt->mnt_pinned) {
655 atomic_inc(&mnt->mnt_count); 689 atomic_inc(&mnt->mnt_count);
656 mnt->mnt_pinned--; 690 mnt->mnt_pinned--;
657 } 691 }
658 spin_unlock(&vfsmount_lock); 692 br_write_unlock(vfsmount_lock);
659} 693}
660 694
661EXPORT_SYMBOL(mnt_unpin); 695EXPORT_SYMBOL(mnt_unpin);
@@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p)
746 struct mnt_namespace *ns = p->ns; 780 struct mnt_namespace *ns = p->ns;
747 int res = 0; 781 int res = 0;
748 782
749 spin_lock(&vfsmount_lock); 783 br_read_lock(vfsmount_lock);
750 if (p->event != ns->event) { 784 if (p->event != ns->event) {
751 p->event = ns->event; 785 p->event = ns->event;
752 res = 1; 786 res = 1;
753 } 787 }
754 spin_unlock(&vfsmount_lock); 788 br_read_unlock(vfsmount_lock);
755 789
756 return res; 790 return res;
757} 791}
@@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt)
952 int minimum_refs = 0; 986 int minimum_refs = 0;
953 struct vfsmount *p; 987 struct vfsmount *p;
954 988
955 spin_lock(&vfsmount_lock); 989 br_read_lock(vfsmount_lock);
956 for (p = mnt; p; p = next_mnt(p, mnt)) { 990 for (p = mnt; p; p = next_mnt(p, mnt)) {
957 actual_refs += atomic_read(&p->mnt_count); 991 actual_refs += atomic_read(&p->mnt_count);
958 minimum_refs += 2; 992 minimum_refs += 2;
959 } 993 }
960 spin_unlock(&vfsmount_lock); 994 br_read_unlock(vfsmount_lock);
961 995
962 if (actual_refs > minimum_refs) 996 if (actual_refs > minimum_refs)
963 return 0; 997 return 0;
@@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt)
984{ 1018{
985 int ret = 1; 1019 int ret = 1;
986 down_read(&namespace_sem); 1020 down_read(&namespace_sem);
987 spin_lock(&vfsmount_lock); 1021 br_read_lock(vfsmount_lock);
988 if (propagate_mount_busy(mnt, 2)) 1022 if (propagate_mount_busy(mnt, 2))
989 ret = 0; 1023 ret = 0;
990 spin_unlock(&vfsmount_lock); 1024 br_read_unlock(vfsmount_lock);
991 up_read(&namespace_sem); 1025 up_read(&namespace_sem);
992 return ret; 1026 return ret;
993} 1027}
@@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head)
1003 if (mnt->mnt_parent != mnt) { 1037 if (mnt->mnt_parent != mnt) {
1004 struct dentry *dentry; 1038 struct dentry *dentry;
1005 struct vfsmount *m; 1039 struct vfsmount *m;
1006 spin_lock(&vfsmount_lock); 1040
1041 br_write_lock(vfsmount_lock);
1007 dentry = mnt->mnt_mountpoint; 1042 dentry = mnt->mnt_mountpoint;
1008 m = mnt->mnt_parent; 1043 m = mnt->mnt_parent;
1009 mnt->mnt_mountpoint = mnt->mnt_root; 1044 mnt->mnt_mountpoint = mnt->mnt_root;
1010 mnt->mnt_parent = mnt; 1045 mnt->mnt_parent = mnt;
1011 m->mnt_ghosts--; 1046 m->mnt_ghosts--;
1012 spin_unlock(&vfsmount_lock); 1047 br_write_unlock(vfsmount_lock);
1013 dput(dentry); 1048 dput(dentry);
1014 mntput(m); 1049 mntput(m);
1015 } 1050 }
@@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head)
1017 } 1052 }
1018} 1053}
1019 1054
1055/*
1056 * vfsmount lock must be held for write
1057 * namespace_sem must be held for write
1058 */
1020void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) 1059void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1021{ 1060{
1022 struct vfsmount *p; 1061 struct vfsmount *p;
@@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1107 } 1146 }
1108 1147
1109 down_write(&namespace_sem); 1148 down_write(&namespace_sem);
1110 spin_lock(&vfsmount_lock); 1149 br_write_lock(vfsmount_lock);
1111 event++; 1150 event++;
1112 1151
1113 if (!(flags & MNT_DETACH)) 1152 if (!(flags & MNT_DETACH))
@@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1119 umount_tree(mnt, 1, &umount_list); 1158 umount_tree(mnt, 1, &umount_list);
1120 retval = 0; 1159 retval = 0;
1121 } 1160 }
1122 spin_unlock(&vfsmount_lock); 1161 br_write_unlock(vfsmount_lock);
1123 up_write(&namespace_sem); 1162 up_write(&namespace_sem);
1124 release_mounts(&umount_list); 1163 release_mounts(&umount_list);
1125 return retval; 1164 return retval;
@@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1231 q = clone_mnt(p, p->mnt_root, flag); 1270 q = clone_mnt(p, p->mnt_root, flag);
1232 if (!q) 1271 if (!q)
1233 goto Enomem; 1272 goto Enomem;
1234 spin_lock(&vfsmount_lock); 1273 br_write_lock(vfsmount_lock);
1235 list_add_tail(&q->mnt_list, &res->mnt_list); 1274 list_add_tail(&q->mnt_list, &res->mnt_list);
1236 attach_mnt(q, &path); 1275 attach_mnt(q, &path);
1237 spin_unlock(&vfsmount_lock); 1276 br_write_unlock(vfsmount_lock);
1238 } 1277 }
1239 } 1278 }
1240 return res; 1279 return res;
1241Enomem: 1280Enomem:
1242 if (res) { 1281 if (res) {
1243 LIST_HEAD(umount_list); 1282 LIST_HEAD(umount_list);
1244 spin_lock(&vfsmount_lock); 1283 br_write_lock(vfsmount_lock);
1245 umount_tree(res, 0, &umount_list); 1284 umount_tree(res, 0, &umount_list);
1246 spin_unlock(&vfsmount_lock); 1285 br_write_unlock(vfsmount_lock);
1247 release_mounts(&umount_list); 1286 release_mounts(&umount_list);
1248 } 1287 }
1249 return NULL; 1288 return NULL;
@@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt)
1262{ 1301{
1263 LIST_HEAD(umount_list); 1302 LIST_HEAD(umount_list);
1264 down_write(&namespace_sem); 1303 down_write(&namespace_sem);
1265 spin_lock(&vfsmount_lock); 1304 br_write_lock(vfsmount_lock);
1266 umount_tree(mnt, 0, &umount_list); 1305 umount_tree(mnt, 0, &umount_list);
1267 spin_unlock(&vfsmount_lock); 1306 br_write_unlock(vfsmount_lock);
1268 up_write(&namespace_sem); 1307 up_write(&namespace_sem);
1269 release_mounts(&umount_list); 1308 release_mounts(&umount_list);
1270} 1309}
@@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1392 if (err) 1431 if (err)
1393 goto out_cleanup_ids; 1432 goto out_cleanup_ids;
1394 1433
1395 spin_lock(&vfsmount_lock); 1434 br_write_lock(vfsmount_lock);
1396 1435
1397 if (IS_MNT_SHARED(dest_mnt)) { 1436 if (IS_MNT_SHARED(dest_mnt)) {
1398 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1437 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1411 list_del_init(&child->mnt_hash); 1450 list_del_init(&child->mnt_hash);
1412 commit_tree(child); 1451 commit_tree(child);
1413 } 1452 }
1414 spin_unlock(&vfsmount_lock); 1453 br_write_unlock(vfsmount_lock);
1454
1415 return 0; 1455 return 0;
1416 1456
1417 out_cleanup_ids: 1457 out_cleanup_ids:
@@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag)
1466 goto out_unlock; 1506 goto out_unlock;
1467 } 1507 }
1468 1508
1469 spin_lock(&vfsmount_lock); 1509 br_write_lock(vfsmount_lock);
1470 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 1510 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1471 change_mnt_propagation(m, type); 1511 change_mnt_propagation(m, type);
1472 spin_unlock(&vfsmount_lock); 1512 br_write_unlock(vfsmount_lock);
1473 1513
1474 out_unlock: 1514 out_unlock:
1475 up_write(&namespace_sem); 1515 up_write(&namespace_sem);
@@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name,
1513 err = graft_tree(mnt, path); 1553 err = graft_tree(mnt, path);
1514 if (err) { 1554 if (err) {
1515 LIST_HEAD(umount_list); 1555 LIST_HEAD(umount_list);
1516 spin_lock(&vfsmount_lock); 1556
1557 br_write_lock(vfsmount_lock);
1517 umount_tree(mnt, 0, &umount_list); 1558 umount_tree(mnt, 0, &umount_list);
1518 spin_unlock(&vfsmount_lock); 1559 br_write_unlock(vfsmount_lock);
1519 release_mounts(&umount_list); 1560 release_mounts(&umount_list);
1520 } 1561 }
1521 1562
@@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1568 else 1609 else
1569 err = do_remount_sb(sb, flags, data, 0); 1610 err = do_remount_sb(sb, flags, data, 0);
1570 if (!err) { 1611 if (!err) {
1571 spin_lock(&vfsmount_lock); 1612 br_write_lock(vfsmount_lock);
1572 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; 1613 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
1573 path->mnt->mnt_flags = mnt_flags; 1614 path->mnt->mnt_flags = mnt_flags;
1574 spin_unlock(&vfsmount_lock); 1615 br_write_unlock(vfsmount_lock);
1575 } 1616 }
1576 up_write(&sb->s_umount); 1617 up_write(&sb->s_umount);
1577 if (!err) { 1618 if (!err) {
1578 spin_lock(&vfsmount_lock); 1619 br_write_lock(vfsmount_lock);
1579 touch_mnt_namespace(path->mnt->mnt_ns); 1620 touch_mnt_namespace(path->mnt->mnt_ns);
1580 spin_unlock(&vfsmount_lock); 1621 br_write_unlock(vfsmount_lock);
1581 } 1622 }
1582 return err; 1623 return err;
1583} 1624}
@@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1754 return; 1795 return;
1755 1796
1756 down_write(&namespace_sem); 1797 down_write(&namespace_sem);
1757 spin_lock(&vfsmount_lock); 1798 br_write_lock(vfsmount_lock);
1758 1799
1759 /* extract from the expiration list every vfsmount that matches the 1800 /* extract from the expiration list every vfsmount that matches the
1760 * following criteria: 1801 * following criteria:
@@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
1773 touch_mnt_namespace(mnt->mnt_ns); 1814 touch_mnt_namespace(mnt->mnt_ns);
1774 umount_tree(mnt, 1, &umounts); 1815 umount_tree(mnt, 1, &umounts);
1775 } 1816 }
1776 spin_unlock(&vfsmount_lock); 1817 br_write_unlock(vfsmount_lock);
1777 up_write(&namespace_sem); 1818 up_write(&namespace_sem);
1778 1819
1779 release_mounts(&umounts); 1820 release_mounts(&umounts);
@@ -1830,6 +1871,8 @@ resume:
1830/* 1871/*
1831 * process a list of expirable mountpoints with the intent of discarding any 1872 * process a list of expirable mountpoints with the intent of discarding any
1832 * submounts of a specific parent mountpoint 1873 * submounts of a specific parent mountpoint
1874 *
1875 * vfsmount_lock must be held for write
1833 */ 1876 */
1834static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) 1877static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1835{ 1878{
@@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2048 kfree(new_ns); 2091 kfree(new_ns);
2049 return ERR_PTR(-ENOMEM); 2092 return ERR_PTR(-ENOMEM);
2050 } 2093 }
2051 spin_lock(&vfsmount_lock); 2094 br_write_lock(vfsmount_lock);
2052 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 2095 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
2053 spin_unlock(&vfsmount_lock); 2096 br_write_unlock(vfsmount_lock);
2054 2097
2055 /* 2098 /*
2056 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 2099 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2244 goto out2; /* not attached */ 2287 goto out2; /* not attached */
2245 /* make sure we can reach put_old from new_root */ 2288 /* make sure we can reach put_old from new_root */
2246 tmp = old.mnt; 2289 tmp = old.mnt;
2247 spin_lock(&vfsmount_lock); 2290 br_write_lock(vfsmount_lock);
2248 if (tmp != new.mnt) { 2291 if (tmp != new.mnt) {
2249 for (;;) { 2292 for (;;) {
2250 if (tmp->mnt_parent == tmp) 2293 if (tmp->mnt_parent == tmp)
@@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2264 /* mount new_root on / */ 2307 /* mount new_root on / */
2265 attach_mnt(new.mnt, &root_parent); 2308 attach_mnt(new.mnt, &root_parent);
2266 touch_mnt_namespace(current->nsproxy->mnt_ns); 2309 touch_mnt_namespace(current->nsproxy->mnt_ns);
2267 spin_unlock(&vfsmount_lock); 2310 br_write_unlock(vfsmount_lock);
2268 chroot_fs_refs(&root, &new); 2311 chroot_fs_refs(&root, &new);
2269 error = 0; 2312 error = 0;
2270 path_put(&root_parent); 2313 path_put(&root_parent);
@@ -2279,7 +2322,7 @@ out1:
2279out0: 2322out0:
2280 return error; 2323 return error;
2281out3: 2324out3:
2282 spin_unlock(&vfsmount_lock); 2325 br_write_unlock(vfsmount_lock);
2283 goto out2; 2326 goto out2;
2284} 2327}
2285 2328
@@ -2326,6 +2369,8 @@ void __init mnt_init(void)
2326 for (u = 0; u < HASH_SIZE; u++) 2369 for (u = 0; u < HASH_SIZE; u++)
2327 INIT_LIST_HEAD(&mount_hashtable[u]); 2370 INIT_LIST_HEAD(&mount_hashtable[u]);
2328 2371
2372 br_lock_init(vfsmount_lock);
2373
2329 err = sysfs_init(); 2374 err = sysfs_init();
2330 if (err) 2375 if (err)
2331 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2376 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns)
2344 if (!atomic_dec_and_test(&ns->count)) 2389 if (!atomic_dec_and_test(&ns->count))
2345 return; 2390 return;
2346 down_write(&namespace_sem); 2391 down_write(&namespace_sem);
2347 spin_lock(&vfsmount_lock); 2392 br_write_lock(vfsmount_lock);
2348 umount_tree(ns->root, 0, &umount_list); 2393 umount_tree(ns->root, 0, &umount_list);
2349 spin_unlock(&vfsmount_lock); 2394 br_write_unlock(vfsmount_lock);
2350 up_write(&namespace_sem); 2395 up_write(&namespace_sem);
2351 release_mounts(&umount_list); 2396 release_mounts(&umount_list);
2352 kfree(ns); 2397 kfree(ns);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index bee60c04109a..922263393c76 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
175{ 175{
176 struct the_nilfs *nilfs = sbi->s_nilfs; 176 struct the_nilfs *nilfs = sbi->s_nilfs;
177 int err; 177 int err;
178 int barrier_done = 0;
179 178
180 if (nilfs_test_opt(sbi, BARRIER)) {
181 set_buffer_ordered(nilfs->ns_sbh[0]);
182 barrier_done = 1;
183 }
184 retry: 179 retry:
185 set_buffer_dirty(nilfs->ns_sbh[0]); 180 set_buffer_dirty(nilfs->ns_sbh[0]);
186 err = sync_dirty_buffer(nilfs->ns_sbh[0]); 181
187 if (err == -EOPNOTSUPP && barrier_done) { 182 if (nilfs_test_opt(sbi, BARRIER)) {
188 nilfs_warning(sbi->s_super, __func__, 183 err = __sync_dirty_buffer(nilfs->ns_sbh[0],
189 "barrier-based sync failed. " 184 WRITE_SYNC | WRITE_BARRIER);
190 "disabling barriers\n"); 185 if (err == -EOPNOTSUPP) {
191 nilfs_clear_opt(sbi, BARRIER); 186 nilfs_warning(sbi->s_super, __func__,
192 barrier_done = 0; 187 "barrier-based sync failed. "
193 clear_buffer_ordered(nilfs->ns_sbh[0]); 188 "disabling barriers\n");
194 goto retry; 189 nilfs_clear_opt(sbi, BARRIER);
190 goto retry;
191 }
192 } else {
193 err = sync_dirty_buffer(nilfs->ns_sbh[0]);
195 } 194 }
195
196 if (unlikely(err)) { 196 if (unlikely(err)) {
197 printk(KERN_ERR 197 printk(KERN_ERR
198 "NILFS: unable to write superblock (err=%d)\n", err); 198 "NILFS: unable to write superblock (err=%d)\n", err);
diff --git a/fs/open.c b/fs/open.c
index 630715f9f73d..d74e1983e8dc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
675 f->f_path.mnt = mnt; 675 f->f_path.mnt = mnt;
676 f->f_pos = 0; 676 f->f_pos = 0;
677 f->f_op = fops_get(inode->i_fop); 677 f->f_op = fops_get(inode->i_fop);
678 file_move(f, &inode->i_sb->s_files); 678 file_sb_list_add(f, inode->i_sb);
679 679
680 error = security_dentry_open(f, cred); 680 error = security_dentry_open(f, cred);
681 if (error) 681 if (error)
@@ -721,7 +721,7 @@ cleanup_all:
721 mnt_drop_write(mnt); 721 mnt_drop_write(mnt);
722 } 722 }
723 } 723 }
724 file_kill(f); 724 file_sb_list_del(f);
725 f->f_path.dentry = NULL; 725 f->f_path.dentry = NULL;
726 f->f_path.mnt = NULL; 726 f->f_path.mnt = NULL;
727cleanup_file: 727cleanup_file:
diff --git a/fs/pnode.c b/fs/pnode.c
index 5cc564a83149..8066b8dd748f 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt)
126 return 0; 126 return 0;
127} 127}
128 128
129/*
130 * vfsmount lock must be held for write
131 */
129void change_mnt_propagation(struct vfsmount *mnt, int type) 132void change_mnt_propagation(struct vfsmount *mnt, int type)
130{ 133{
131 if (type == MS_SHARED) { 134 if (type == MS_SHARED) {
@@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
270 prev_src_mnt = child; 273 prev_src_mnt = child;
271 } 274 }
272out: 275out:
273 spin_lock(&vfsmount_lock); 276 br_write_lock(vfsmount_lock);
274 while (!list_empty(&tmp_list)) { 277 while (!list_empty(&tmp_list)) {
275 child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); 278 child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash);
276 umount_tree(child, 0, &umount_list); 279 umount_tree(child, 0, &umount_list);
277 } 280 }
278 spin_unlock(&vfsmount_lock); 281 br_write_unlock(vfsmount_lock);
279 release_mounts(&umount_list); 282 release_mounts(&umount_list);
280 return ret; 283 return ret;
281} 284}
@@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
296 * other mounts its parent propagates to. 299 * other mounts its parent propagates to.
297 * Check if any of these mounts that **do not have submounts** 300 * Check if any of these mounts that **do not have submounts**
298 * have more references than 'refcnt'. If so return busy. 301 * have more references than 'refcnt'. If so return busy.
302 *
303 * vfsmount lock must be held for read or write
299 */ 304 */
300int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 305int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
301{ 306{
@@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt)
353 * collect all mounts that receive propagation from the mount in @list, 358 * collect all mounts that receive propagation from the mount in @list,
354 * and return these additional mounts in the same list. 359 * and return these additional mounts in the same list.
355 * @list: the list of mounts to be unmounted. 360 * @list: the list of mounts to be unmounted.
361 *
362 * vfsmount lock must be held for write
356 */ 363 */
357int propagate_umount(struct list_head *list) 364int propagate_umount(struct list_head *list)
358{ 365{
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ae35413dcbe1..caa758377d66 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode)
83 dquot_drop(inode); 83 dquot_drop(inode);
84 inode->i_blocks = 0; 84 inode->i_blocks = 0;
85 reiserfs_write_unlock_once(inode->i_sb, depth); 85 reiserfs_write_unlock_once(inode->i_sb, depth);
86 return;
86 87
87no_delete: 88no_delete:
88 end_writeback(inode); 89 end_writeback(inode);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1ec952b1f036..812e2c05aa29 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb,
2311 /* flush out the real blocks */ 2311 /* flush out the real blocks */
2312 for (i = 0; i < get_desc_trans_len(desc); i++) { 2312 for (i = 0; i < get_desc_trans_len(desc); i++) {
2313 set_buffer_dirty(real_blocks[i]); 2313 set_buffer_dirty(real_blocks[i]);
2314 ll_rw_block(SWRITE, 1, real_blocks + i); 2314 write_dirty_buffer(real_blocks[i], WRITE);
2315 } 2315 }
2316 for (i = 0; i < get_desc_trans_len(desc); i++) { 2316 for (i = 0; i < get_desc_trans_len(desc); i++) {
2317 wait_on_buffer(real_blocks[i]); 2317 wait_on_buffer(real_blocks[i]);
diff --git a/fs/super.c b/fs/super.c
index 9674ab2c8718..8819e3a7ff20 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type)
54 s = NULL; 54 s = NULL;
55 goto out; 55 goto out;
56 } 56 }
57#ifdef CONFIG_SMP
58 s->s_files = alloc_percpu(struct list_head);
59 if (!s->s_files) {
60 security_sb_free(s);
61 kfree(s);
62 s = NULL;
63 goto out;
64 } else {
65 int i;
66
67 for_each_possible_cpu(i)
68 INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
69 }
70#else
57 INIT_LIST_HEAD(&s->s_files); 71 INIT_LIST_HEAD(&s->s_files);
72#endif
58 INIT_LIST_HEAD(&s->s_instances); 73 INIT_LIST_HEAD(&s->s_instances);
59 INIT_HLIST_HEAD(&s->s_anon); 74 INIT_HLIST_HEAD(&s->s_anon);
60 INIT_LIST_HEAD(&s->s_inodes); 75 INIT_LIST_HEAD(&s->s_inodes);
@@ -108,6 +123,9 @@ out:
108 */ 123 */
109static inline void destroy_super(struct super_block *s) 124static inline void destroy_super(struct super_block *s)
110{ 125{
126#ifdef CONFIG_SMP
127 free_percpu(s->s_files);
128#endif
111 security_sb_free(s); 129 security_sb_free(s);
112 kfree(s->s_subtype); 130 kfree(s->s_subtype);
113 kfree(s->s_options); 131 kfree(s->s_options);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 048484fb10d2..46f7a807bbc1 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
114 114
115 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 115 ubh_mark_buffer_dirty (USPI_UBH(uspi));
116 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 116 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
117 if (sb->s_flags & MS_SYNCHRONOUS) { 117 if (sb->s_flags & MS_SYNCHRONOUS)
118 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 118 ubh_sync_block(UCPI_UBH(ucpi));
119 ubh_wait_on_buffer (UCPI_UBH(ucpi));
120 }
121 sb->s_dirt = 1; 119 sb->s_dirt = 1;
122 120
123 unlock_super (sb); 121 unlock_super (sb);
@@ -207,10 +205,8 @@ do_more:
207 205
208 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 206 ubh_mark_buffer_dirty (USPI_UBH(uspi));
209 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 207 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
210 if (sb->s_flags & MS_SYNCHRONOUS) { 208 if (sb->s_flags & MS_SYNCHRONOUS)
211 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 209 ubh_sync_block(UCPI_UBH(ucpi));
212 ubh_wait_on_buffer (UCPI_UBH(ucpi));
213 }
214 210
215 if (overflow) { 211 if (overflow) {
216 fragment += count; 212 fragment += count;
@@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
558 554
559 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 555 ubh_mark_buffer_dirty (USPI_UBH(uspi));
560 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 556 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
561 if (sb->s_flags & MS_SYNCHRONOUS) { 557 if (sb->s_flags & MS_SYNCHRONOUS)
562 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 558 ubh_sync_block(UCPI_UBH(ucpi));
563 ubh_wait_on_buffer (UCPI_UBH(ucpi));
564 }
565 sb->s_dirt = 1; 559 sb->s_dirt = 1;
566 560
567 UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); 561 UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
@@ -680,10 +674,8 @@ cg_found:
680succed: 674succed:
681 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 675 ubh_mark_buffer_dirty (USPI_UBH(uspi));
682 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 676 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
683 if (sb->s_flags & MS_SYNCHRONOUS) { 677 if (sb->s_flags & MS_SYNCHRONOUS)
684 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 678 ubh_sync_block(UCPI_UBH(ucpi));
685 ubh_wait_on_buffer (UCPI_UBH(ucpi));
686 }
687 sb->s_dirt = 1; 679 sb->s_dirt = 1;
688 680
689 result += cgno * uspi->s_fpg; 681 result += cgno * uspi->s_fpg;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 428017e018fe..2eabf04af3de 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode)
113 113
114 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 114 ubh_mark_buffer_dirty (USPI_UBH(uspi));
115 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 115 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
116 if (sb->s_flags & MS_SYNCHRONOUS) { 116 if (sb->s_flags & MS_SYNCHRONOUS)
117 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 117 ubh_sync_block(UCPI_UBH(ucpi));
118 ubh_wait_on_buffer (UCPI_UBH(ucpi));
119 }
120 118
121 sb->s_dirt = 1; 119 sb->s_dirt = 1;
122 unlock_super (sb); 120 unlock_super (sb);
@@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
156 154
157 fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); 155 fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);
158 ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); 156 ubh_mark_buffer_dirty(UCPI_UBH(ucpi));
159 if (sb->s_flags & MS_SYNCHRONOUS) { 157 if (sb->s_flags & MS_SYNCHRONOUS)
160 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 158 ubh_sync_block(UCPI_UBH(ucpi));
161 ubh_wait_on_buffer(UCPI_UBH(ucpi));
162 }
163 159
164 UFSD("EXIT\n"); 160 UFSD("EXIT\n");
165} 161}
@@ -290,10 +286,8 @@ cg_found:
290 } 286 }
291 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 287 ubh_mark_buffer_dirty (USPI_UBH(uspi));
292 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 288 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
293 if (sb->s_flags & MS_SYNCHRONOUS) { 289 if (sb->s_flags & MS_SYNCHRONOUS)
294 ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); 290 ubh_sync_block(UCPI_UBH(ucpi));
295 ubh_wait_on_buffer (UCPI_UBH(ucpi));
296 }
297 sb->s_dirt = 1; 291 sb->s_dirt = 1;
298 292
299 inode->i_ino = cg * uspi->s_ipg + bit; 293 inode->i_ino = cg * uspi->s_ipg + bit;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 34d5cb135320..a58f9155fc9a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
243 ubh_bforget(ind_ubh); 243 ubh_bforget(ind_ubh);
244 ind_ubh = NULL; 244 ind_ubh = NULL;
245 } 245 }
246 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { 246 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
247 ubh_ll_rw_block(SWRITE, ind_ubh); 247 ubh_sync_block(ind_ubh);
248 ubh_wait_on_buffer (ind_ubh);
249 }
250 ubh_brelse (ind_ubh); 248 ubh_brelse (ind_ubh);
251 249
252 UFSD("EXIT: ino %lu\n", inode->i_ino); 250 UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
307 ubh_bforget(dind_bh); 305 ubh_bforget(dind_bh);
308 dind_bh = NULL; 306 dind_bh = NULL;
309 } 307 }
310 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { 308 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
311 ubh_ll_rw_block(SWRITE, dind_bh); 309 ubh_sync_block(dind_bh);
312 ubh_wait_on_buffer (dind_bh);
313 }
314 ubh_brelse (dind_bh); 310 ubh_brelse (dind_bh);
315 311
316 UFSD("EXIT: ino %lu\n", inode->i_ino); 312 UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode)
367 ubh_bforget(tind_bh); 363 ubh_bforget(tind_bh);
368 tind_bh = NULL; 364 tind_bh = NULL;
369 } 365 }
370 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { 366 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
371 ubh_ll_rw_block(SWRITE, tind_bh); 367 ubh_sync_block(tind_bh);
372 ubh_wait_on_buffer (tind_bh);
373 }
374 ubh_brelse (tind_bh); 368 ubh_brelse (tind_bh);
375 369
376 UFSD("EXIT: ino %lu\n", inode->i_ino); 370 UFSD("EXIT: ino %lu\n", inode->i_ino);
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 85a7fc9e4a4e..d2c36d53fe66 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag)
113 } 113 }
114} 114}
115 115
116void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) 116void ubh_sync_block(struct ufs_buffer_head *ubh)
117{ 117{
118 if (!ubh) 118 if (ubh) {
119 return; 119 unsigned i;
120 120
121 ll_rw_block(rw, ubh->count, ubh->bh); 121 for (i = 0; i < ubh->count; i++)
122} 122 write_dirty_buffer(ubh->bh[i], WRITE);
123 123
124void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) 124 for (i = 0; i < ubh->count; i++)
125{ 125 wait_on_buffer(ubh->bh[i]);
126 unsigned i; 126 }
127 if (!ubh)
128 return;
129 for ( i = 0; i < ubh->count; i++ )
130 wait_on_buffer (ubh->bh[i]);
131} 127}
132 128
133void ubh_bforget (struct ufs_buffer_head * ubh) 129void ubh_bforget (struct ufs_buffer_head * ubh)
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 0466036912f1..9f8775ce381c 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *);
269extern void ubh_brelse_uspi (struct ufs_sb_private_info *); 269extern void ubh_brelse_uspi (struct ufs_sb_private_info *);
270extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); 270extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *);
271extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); 271extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int);
272extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); 272extern void ubh_sync_block(struct ufs_buffer_head *);
273extern void ubh_wait_on_buffer (struct ufs_buffer_head *);
274extern void ubh_bforget (struct ufs_buffer_head *); 273extern void ubh_bforget (struct ufs_buffer_head *);
275extern int ubh_buffer_dirty (struct ufs_buffer_head *); 274extern int ubh_buffer_dirty (struct ufs_buffer_head *);
276#define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) 275#define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size)