aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-05 23:34:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-05 23:34:28 -0400
commit7d9071a095023cd1db8fa18fa0d648dc1a5210e0 (patch)
tree072b462e43912b9dfc321136f3367114dcb8f2b3
parentbd779669945ed9982890da789ad32e3bd0d41f14 (diff)
parent397d425dc26da728396e66d392d5dcb8dac30c37 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs updates from Al Viro: "In this one: - d_move fixes (Eric Biederman) - UFS fixes (me; locking is mostly sane now, a bunch of bugs in error handling ought to be fixed) - switch of sb_writers to percpu rwsem (Oleg Nesterov) - superblock scalability (Josef Bacik and Dave Chinner) - swapon(2) race fix (Hugh Dickins)" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (65 commits) vfs: Test for and handle paths that are unreachable from their mnt_root dcache: Reduce the scope of i_lock in d_splice_alias dcache: Handle escaped paths in prepend_path mm: fix potential data race in SyS_swapon inode: don't softlockup when evicting inodes inode: rename i_wb_list to i_io_list sync: serialise per-superblock sync operations inode: convert inode_sb_list_lock to per-sb inode: add hlist_fake to avoid the inode hash lock in evict writeback: plug writeback at a high level change sb_writers to use percpu_rw_semaphore shift percpu_counter_destroy() into destroy_super_work() percpu-rwsem: kill CONFIG_PERCPU_RWSEM percpu-rwsem: introduce percpu_rwsem_release() and percpu_rwsem_acquire() percpu-rwsem: introduce percpu_down_read_trylock() document rwsem_release() in sb_wait_write() fix the broken lockdep logic in __sb_start_write() introduce __sb_writers_{acquired,release}() helpers ufs_inode_get{frag,block}(): get rid of 'phys' argument ufs_getfrag_block(): tidy up a bit ...
-rw-r--r--arch/Kconfig1
-rw-r--r--fs/block_dev.c12
-rw-r--r--fs/btrfs/transaction.c8
-rw-r--r--fs/dcache.c14
-rw-r--r--fs/drop_caches.c10
-rw-r--r--fs/fs-writeback.c72
-rw-r--r--fs/inode.c50
-rw-r--r--fs/internal.h3
-rw-r--r--fs/namei.c27
-rw-r--r--fs/notify/inode_mark.c20
-rw-r--r--fs/quota/dquot.c16
-rw-r--r--fs/super.c175
-rw-r--r--fs/ufs/Makefile2
-rw-r--r--fs/ufs/balloc.c4
-rw-r--r--fs/ufs/inode.c948
-rw-r--r--fs/ufs/super.c36
-rw-r--r--fs/ufs/truncate.c523
-rw-r--r--fs/ufs/ufs.h13
-rw-r--r--fs/xfs/xfs_aops.c6
-rw-r--r--include/linux/fs.h33
-rw-r--r--include/linux/fsnotify_backend.h4
-rw-r--r--include/linux/list.h5
-rw-r--r--include/linux/percpu-rwsem.h20
-rw-r--r--init/Kconfig1
-rw-r--r--kernel/locking/Makefile3
-rw-r--r--kernel/locking/percpu-rwsem.c13
-rw-r--r--lib/Kconfig3
-rw-r--r--mm/backing-dev.c8
-rw-r--r--mm/swapfile.c25
29 files changed, 936 insertions, 1119 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index a71cdbe2a04d..8f3564930580 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -93,7 +93,6 @@ config KPROBES_ON_FTRACE
93 93
94config UPROBES 94config UPROBES
95 def_bool n 95 def_bool n
96 select PERCPU_RWSEM
97 help 96 help
98 Uprobes is the user-space counterpart to kprobes: they 97 Uprobes is the user-space counterpart to kprobes: they
99 enable instrumentation applications (such as 'perf probe') 98 enable instrumentation applications (such as 'perf probe')
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 198243717da5..33b813e04f79 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1769,7 +1769,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1769{ 1769{
1770 struct inode *inode, *old_inode = NULL; 1770 struct inode *inode, *old_inode = NULL;
1771 1771
1772 spin_lock(&inode_sb_list_lock); 1772 spin_lock(&blockdev_superblock->s_inode_list_lock);
1773 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { 1773 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1774 struct address_space *mapping = inode->i_mapping; 1774 struct address_space *mapping = inode->i_mapping;
1775 1775
@@ -1781,13 +1781,13 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1781 } 1781 }
1782 __iget(inode); 1782 __iget(inode);
1783 spin_unlock(&inode->i_lock); 1783 spin_unlock(&inode->i_lock);
1784 spin_unlock(&inode_sb_list_lock); 1784 spin_unlock(&blockdev_superblock->s_inode_list_lock);
1785 /* 1785 /*
1786 * We hold a reference to 'inode' so it couldn't have been 1786 * We hold a reference to 'inode' so it couldn't have been
1787 * removed from s_inodes list while we dropped the 1787 * removed from s_inodes list while we dropped the
1788 * inode_sb_list_lock. We cannot iput the inode now as we can 1788 * s_inode_list_lock We cannot iput the inode now as we can
1789 * be holding the last reference and we cannot iput it under 1789 * be holding the last reference and we cannot iput it under
1790 * inode_sb_list_lock. So we keep the reference and iput it 1790 * s_inode_list_lock. So we keep the reference and iput it
1791 * later. 1791 * later.
1792 */ 1792 */
1793 iput(old_inode); 1793 iput(old_inode);
@@ -1795,8 +1795,8 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1795 1795
1796 func(I_BDEV(inode), arg); 1796 func(I_BDEV(inode), arg);
1797 1797
1798 spin_lock(&inode_sb_list_lock); 1798 spin_lock(&blockdev_superblock->s_inode_list_lock);
1799 } 1799 }
1800 spin_unlock(&inode_sb_list_lock); 1800 spin_unlock(&blockdev_superblock->s_inode_list_lock);
1801 iput(old_inode); 1801 iput(old_inode);
1802} 1802}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 68ad89e23713..8f259b3a66b3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1640,9 +1640,7 @@ static void do_async_commit(struct work_struct *work)
1640 * Tell lockdep about it. 1640 * Tell lockdep about it.
1641 */ 1641 */
1642 if (ac->newtrans->type & __TRANS_FREEZABLE) 1642 if (ac->newtrans->type & __TRANS_FREEZABLE)
1643 rwsem_acquire_read( 1643 __sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS);
1644 &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1645 0, 1, _THIS_IP_);
1646 1644
1647 current->journal_info = ac->newtrans; 1645 current->journal_info = ac->newtrans;
1648 1646
@@ -1681,9 +1679,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1681 * async commit thread will be the one to unlock it. 1679 * async commit thread will be the one to unlock it.
1682 */ 1680 */
1683 if (ac->newtrans->type & __TRANS_FREEZABLE) 1681 if (ac->newtrans->type & __TRANS_FREEZABLE)
1684 rwsem_release( 1682 __sb_writers_release(root->fs_info->sb, SB_FREEZE_FS);
1685 &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1686 1, _THIS_IP_);
1687 1683
1688 schedule_work(&ac->work); 1684 schedule_work(&ac->work);
1689 1685
diff --git a/fs/dcache.c b/fs/dcache.c
index 9b5fe503f6cb..5c33aeb0f68f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2718,7 +2718,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2718 * This helper attempts to cope with remotely renamed directories 2718 * This helper attempts to cope with remotely renamed directories
2719 * 2719 *
2720 * It assumes that the caller is already holding 2720 * It assumes that the caller is already holding
2721 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock 2721 * dentry->d_parent->d_inode->i_mutex, and rename_lock
2722 * 2722 *
2723 * Note: If ever the locking in lock_rename() changes, then please 2723 * Note: If ever the locking in lock_rename() changes, then please
2724 * remember to update this too... 2724 * remember to update this too...
@@ -2744,7 +2744,6 @@ out_unalias:
2744 __d_move(alias, dentry, false); 2744 __d_move(alias, dentry, false);
2745 ret = 0; 2745 ret = 0;
2746out_err: 2746out_err:
2747 spin_unlock(&inode->i_lock);
2748 if (m2) 2747 if (m2)
2749 mutex_unlock(m2); 2748 mutex_unlock(m2);
2750 if (m1) 2749 if (m1)
@@ -2790,10 +2789,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
2790 if (S_ISDIR(inode->i_mode)) { 2789 if (S_ISDIR(inode->i_mode)) {
2791 struct dentry *new = __d_find_any_alias(inode); 2790 struct dentry *new = __d_find_any_alias(inode);
2792 if (unlikely(new)) { 2791 if (unlikely(new)) {
2792 /* The reference to new ensures it remains an alias */
2793 spin_unlock(&inode->i_lock);
2793 write_seqlock(&rename_lock); 2794 write_seqlock(&rename_lock);
2794 if (unlikely(d_ancestor(new, dentry))) { 2795 if (unlikely(d_ancestor(new, dentry))) {
2795 write_sequnlock(&rename_lock); 2796 write_sequnlock(&rename_lock);
2796 spin_unlock(&inode->i_lock);
2797 dput(new); 2797 dput(new);
2798 new = ERR_PTR(-ELOOP); 2798 new = ERR_PTR(-ELOOP);
2799 pr_warn_ratelimited( 2799 pr_warn_ratelimited(
@@ -2812,7 +2812,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
2812 } else { 2812 } else {
2813 __d_move(new, dentry, false); 2813 __d_move(new, dentry, false);
2814 write_sequnlock(&rename_lock); 2814 write_sequnlock(&rename_lock);
2815 spin_unlock(&inode->i_lock);
2816 security_d_instantiate(new, inode); 2815 security_d_instantiate(new, inode);
2817 } 2816 }
2818 iput(inode); 2817 iput(inode);
@@ -2926,6 +2925,13 @@ restart:
2926 2925
2927 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2926 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2928 struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); 2927 struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
2928 /* Escaped? */
2929 if (dentry != vfsmnt->mnt_root) {
2930 bptr = *buffer;
2931 blen = *buflen;
2932 error = 3;
2933 break;
2934 }
2929 /* Global root? */ 2935 /* Global root? */
2930 if (mnt != parent) { 2936 if (mnt != parent) {
2931 dentry = ACCESS_ONCE(mnt->mnt_mountpoint); 2937 dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 5718cb9f7273..d72d52b90433 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -17,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
17{ 17{
18 struct inode *inode, *toput_inode = NULL; 18 struct inode *inode, *toput_inode = NULL;
19 19
20 spin_lock(&inode_sb_list_lock); 20 spin_lock(&sb->s_inode_list_lock);
21 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 21 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
22 spin_lock(&inode->i_lock); 22 spin_lock(&inode->i_lock);
23 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 23 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -27,13 +27,15 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
27 } 27 }
28 __iget(inode); 28 __iget(inode);
29 spin_unlock(&inode->i_lock); 29 spin_unlock(&inode->i_lock);
30 spin_unlock(&inode_sb_list_lock); 30 spin_unlock(&sb->s_inode_list_lock);
31
31 invalidate_mapping_pages(inode->i_mapping, 0, -1); 32 invalidate_mapping_pages(inode->i_mapping, 0, -1);
32 iput(toput_inode); 33 iput(toput_inode);
33 toput_inode = inode; 34 toput_inode = inode;
34 spin_lock(&inode_sb_list_lock); 35
36 spin_lock(&sb->s_inode_list_lock);
35 } 37 }
36 spin_unlock(&inode_sb_list_lock); 38 spin_unlock(&sb->s_inode_list_lock);
37 iput(toput_inode); 39 iput(toput_inode);
38} 40}
39 41
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5fa588e933d5..ae0f438c2ee6 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -88,7 +88,7 @@ unsigned int dirtytime_expire_interval = 12 * 60 * 60;
88 88
89static inline struct inode *wb_inode(struct list_head *head) 89static inline struct inode *wb_inode(struct list_head *head)
90{ 90{
91 return list_entry(head, struct inode, i_wb_list); 91 return list_entry(head, struct inode, i_io_list);
92} 92}
93 93
94/* 94/*
@@ -125,22 +125,22 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb)
125} 125}
126 126
127/** 127/**
128 * inode_wb_list_move_locked - move an inode onto a bdi_writeback IO list 128 * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
129 * @inode: inode to be moved 129 * @inode: inode to be moved
130 * @wb: target bdi_writeback 130 * @wb: target bdi_writeback
131 * @head: one of @wb->b_{dirty|io|more_io} 131 * @head: one of @wb->b_{dirty|io|more_io}
132 * 132 *
133 * Move @inode->i_wb_list to @list of @wb and set %WB_has_dirty_io. 133 * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
134 * Returns %true if @inode is the first occupant of the !dirty_time IO 134 * Returns %true if @inode is the first occupant of the !dirty_time IO
135 * lists; otherwise, %false. 135 * lists; otherwise, %false.
136 */ 136 */
137static bool inode_wb_list_move_locked(struct inode *inode, 137static bool inode_io_list_move_locked(struct inode *inode,
138 struct bdi_writeback *wb, 138 struct bdi_writeback *wb,
139 struct list_head *head) 139 struct list_head *head)
140{ 140{
141 assert_spin_locked(&wb->list_lock); 141 assert_spin_locked(&wb->list_lock);
142 142
143 list_move(&inode->i_wb_list, head); 143 list_move(&inode->i_io_list, head);
144 144
145 /* dirty_time doesn't count as dirty_io until expiration */ 145 /* dirty_time doesn't count as dirty_io until expiration */
146 if (head != &wb->b_dirty_time) 146 if (head != &wb->b_dirty_time)
@@ -151,19 +151,19 @@ static bool inode_wb_list_move_locked(struct inode *inode,
151} 151}
152 152
153/** 153/**
154 * inode_wb_list_del_locked - remove an inode from its bdi_writeback IO list 154 * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
155 * @inode: inode to be removed 155 * @inode: inode to be removed
156 * @wb: bdi_writeback @inode is being removed from 156 * @wb: bdi_writeback @inode is being removed from
157 * 157 *
158 * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and 158 * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
159 * clear %WB_has_dirty_io if all are empty afterwards. 159 * clear %WB_has_dirty_io if all are empty afterwards.
160 */ 160 */
161static void inode_wb_list_del_locked(struct inode *inode, 161static void inode_io_list_del_locked(struct inode *inode,
162 struct bdi_writeback *wb) 162 struct bdi_writeback *wb)
163{ 163{
164 assert_spin_locked(&wb->list_lock); 164 assert_spin_locked(&wb->list_lock);
165 165
166 list_del_init(&inode->i_wb_list); 166 list_del_init(&inode->i_io_list);
167 wb_io_lists_depopulated(wb); 167 wb_io_lists_depopulated(wb);
168} 168}
169 169
@@ -351,7 +351,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
351 351
352 /* 352 /*
353 * Once I_FREEING is visible under i_lock, the eviction path owns 353 * Once I_FREEING is visible under i_lock, the eviction path owns
354 * the inode and we shouldn't modify ->i_wb_list. 354 * the inode and we shouldn't modify ->i_io_list.
355 */ 355 */
356 if (unlikely(inode->i_state & I_FREEING)) 356 if (unlikely(inode->i_state & I_FREEING))
357 goto skip_switch; 357 goto skip_switch;
@@ -390,16 +390,16 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
390 * is always correct including from ->b_dirty_time. The transfer 390 * is always correct including from ->b_dirty_time. The transfer
391 * preserves @inode->dirtied_when ordering. 391 * preserves @inode->dirtied_when ordering.
392 */ 392 */
393 if (!list_empty(&inode->i_wb_list)) { 393 if (!list_empty(&inode->i_io_list)) {
394 struct inode *pos; 394 struct inode *pos;
395 395
396 inode_wb_list_del_locked(inode, old_wb); 396 inode_io_list_del_locked(inode, old_wb);
397 inode->i_wb = new_wb; 397 inode->i_wb = new_wb;
398 list_for_each_entry(pos, &new_wb->b_dirty, i_wb_list) 398 list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
399 if (time_after_eq(inode->dirtied_when, 399 if (time_after_eq(inode->dirtied_when,
400 pos->dirtied_when)) 400 pos->dirtied_when))
401 break; 401 break;
402 inode_wb_list_move_locked(inode, new_wb, pos->i_wb_list.prev); 402 inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
403 } else { 403 } else {
404 inode->i_wb = new_wb; 404 inode->i_wb = new_wb;
405 } 405 }
@@ -961,12 +961,12 @@ void wb_start_background_writeback(struct bdi_writeback *wb)
961/* 961/*
962 * Remove the inode from the writeback list it is on. 962 * Remove the inode from the writeback list it is on.
963 */ 963 */
964void inode_wb_list_del(struct inode *inode) 964void inode_io_list_del(struct inode *inode)
965{ 965{
966 struct bdi_writeback *wb; 966 struct bdi_writeback *wb;
967 967
968 wb = inode_to_wb_and_lock_list(inode); 968 wb = inode_to_wb_and_lock_list(inode);
969 inode_wb_list_del_locked(inode, wb); 969 inode_io_list_del_locked(inode, wb);
970 spin_unlock(&wb->list_lock); 970 spin_unlock(&wb->list_lock);
971} 971}
972 972
@@ -988,7 +988,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
988 if (time_before(inode->dirtied_when, tail->dirtied_when)) 988 if (time_before(inode->dirtied_when, tail->dirtied_when))
989 inode->dirtied_when = jiffies; 989 inode->dirtied_when = jiffies;
990 } 990 }
991 inode_wb_list_move_locked(inode, wb, &wb->b_dirty); 991 inode_io_list_move_locked(inode, wb, &wb->b_dirty);
992} 992}
993 993
994/* 994/*
@@ -996,7 +996,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
996 */ 996 */
997static void requeue_io(struct inode *inode, struct bdi_writeback *wb) 997static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
998{ 998{
999 inode_wb_list_move_locked(inode, wb, &wb->b_more_io); 999 inode_io_list_move_locked(inode, wb, &wb->b_more_io);
1000} 1000}
1001 1001
1002static void inode_sync_complete(struct inode *inode) 1002static void inode_sync_complete(struct inode *inode)
@@ -1055,7 +1055,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
1055 if (older_than_this && 1055 if (older_than_this &&
1056 inode_dirtied_after(inode, *older_than_this)) 1056 inode_dirtied_after(inode, *older_than_this))
1057 break; 1057 break;
1058 list_move(&inode->i_wb_list, &tmp); 1058 list_move(&inode->i_io_list, &tmp);
1059 moved++; 1059 moved++;
1060 if (flags & EXPIRE_DIRTY_ATIME) 1060 if (flags & EXPIRE_DIRTY_ATIME)
1061 set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); 1061 set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
@@ -1078,7 +1078,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
1078 list_for_each_prev_safe(pos, node, &tmp) { 1078 list_for_each_prev_safe(pos, node, &tmp) {
1079 inode = wb_inode(pos); 1079 inode = wb_inode(pos);
1080 if (inode->i_sb == sb) 1080 if (inode->i_sb == sb)
1081 list_move(&inode->i_wb_list, dispatch_queue); 1081 list_move(&inode->i_io_list, dispatch_queue);
1082 } 1082 }
1083 } 1083 }
1084out: 1084out:
@@ -1232,10 +1232,10 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
1232 redirty_tail(inode, wb); 1232 redirty_tail(inode, wb);
1233 } else if (inode->i_state & I_DIRTY_TIME) { 1233 } else if (inode->i_state & I_DIRTY_TIME) {
1234 inode->dirtied_when = jiffies; 1234 inode->dirtied_when = jiffies;
1235 inode_wb_list_move_locked(inode, wb, &wb->b_dirty_time); 1235 inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
1236 } else { 1236 } else {
1237 /* The inode is clean. Remove from writeback lists. */ 1237 /* The inode is clean. Remove from writeback lists. */
1238 inode_wb_list_del_locked(inode, wb); 1238 inode_io_list_del_locked(inode, wb);
1239 } 1239 }
1240} 1240}
1241 1241
@@ -1378,7 +1378,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
1378 * touch it. See comment above for explanation. 1378 * touch it. See comment above for explanation.
1379 */ 1379 */
1380 if (!(inode->i_state & I_DIRTY_ALL)) 1380 if (!(inode->i_state & I_DIRTY_ALL))
1381 inode_wb_list_del_locked(inode, wb); 1381 inode_io_list_del_locked(inode, wb);
1382 spin_unlock(&wb->list_lock); 1382 spin_unlock(&wb->list_lock);
1383 inode_sync_complete(inode); 1383 inode_sync_complete(inode);
1384out: 1384out:
@@ -1439,7 +1439,9 @@ static long writeback_sb_inodes(struct super_block *sb,
1439 unsigned long start_time = jiffies; 1439 unsigned long start_time = jiffies;
1440 long write_chunk; 1440 long write_chunk;
1441 long wrote = 0; /* count both pages and inodes */ 1441 long wrote = 0; /* count both pages and inodes */
1442 struct blk_plug plug;
1442 1443
1444 blk_start_plug(&plug);
1443 while (!list_empty(&wb->b_io)) { 1445 while (!list_empty(&wb->b_io)) {
1444 struct inode *inode = wb_inode(wb->b_io.prev); 1446 struct inode *inode = wb_inode(wb->b_io.prev);
1445 1447
@@ -1537,6 +1539,7 @@ static long writeback_sb_inodes(struct super_block *sb,
1537 break; 1539 break;
1538 } 1540 }
1539 } 1541 }
1542 blk_finish_plug(&plug);
1540 return wrote; 1543 return wrote;
1541} 1544}
1542 1545
@@ -2088,7 +2091,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
2088 else 2091 else
2089 dirty_list = &wb->b_dirty_time; 2092 dirty_list = &wb->b_dirty_time;
2090 2093
2091 wakeup_bdi = inode_wb_list_move_locked(inode, wb, 2094 wakeup_bdi = inode_io_list_move_locked(inode, wb,
2092 dirty_list); 2095 dirty_list);
2093 2096
2094 spin_unlock(&wb->list_lock); 2097 spin_unlock(&wb->list_lock);
@@ -2111,6 +2114,15 @@ out_unlock_inode:
2111} 2114}
2112EXPORT_SYMBOL(__mark_inode_dirty); 2115EXPORT_SYMBOL(__mark_inode_dirty);
2113 2116
2117/*
2118 * The @s_sync_lock is used to serialise concurrent sync operations
2119 * to avoid lock contention problems with concurrent wait_sb_inodes() calls.
2120 * Concurrent callers will block on the s_sync_lock rather than doing contending
2121 * walks. The queueing maintains sync(2) required behaviour as all the IO that
2122 * has been issued up to the time this function is enter is guaranteed to be
2123 * completed by the time we have gained the lock and waited for all IO that is
2124 * in progress regardless of the order callers are granted the lock.
2125 */
2114static void wait_sb_inodes(struct super_block *sb) 2126static void wait_sb_inodes(struct super_block *sb)
2115{ 2127{
2116 struct inode *inode, *old_inode = NULL; 2128 struct inode *inode, *old_inode = NULL;
@@ -2121,7 +2133,8 @@ static void wait_sb_inodes(struct super_block *sb)
2121 */ 2133 */
2122 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 2134 WARN_ON(!rwsem_is_locked(&sb->s_umount));
2123 2135
2124 spin_lock(&inode_sb_list_lock); 2136 mutex_lock(&sb->s_sync_lock);
2137 spin_lock(&sb->s_inode_list_lock);
2125 2138
2126 /* 2139 /*
2127 * Data integrity sync. Must wait for all pages under writeback, 2140 * Data integrity sync. Must wait for all pages under writeback,
@@ -2141,14 +2154,14 @@ static void wait_sb_inodes(struct super_block *sb)
2141 } 2154 }
2142 __iget(inode); 2155 __iget(inode);
2143 spin_unlock(&inode->i_lock); 2156 spin_unlock(&inode->i_lock);
2144 spin_unlock(&inode_sb_list_lock); 2157 spin_unlock(&sb->s_inode_list_lock);
2145 2158
2146 /* 2159 /*
2147 * We hold a reference to 'inode' so it couldn't have been 2160 * We hold a reference to 'inode' so it couldn't have been
2148 * removed from s_inodes list while we dropped the 2161 * removed from s_inodes list while we dropped the
2149 * inode_sb_list_lock. We cannot iput the inode now as we can 2162 * s_inode_list_lock. We cannot iput the inode now as we can
2150 * be holding the last reference and we cannot iput it under 2163 * be holding the last reference and we cannot iput it under
2151 * inode_sb_list_lock. So we keep the reference and iput it 2164 * s_inode_list_lock. So we keep the reference and iput it
2152 * later. 2165 * later.
2153 */ 2166 */
2154 iput(old_inode); 2167 iput(old_inode);
@@ -2158,10 +2171,11 @@ static void wait_sb_inodes(struct super_block *sb)
2158 2171
2159 cond_resched(); 2172 cond_resched();
2160 2173
2161 spin_lock(&inode_sb_list_lock); 2174 spin_lock(&sb->s_inode_list_lock);
2162 } 2175 }
2163 spin_unlock(&inode_sb_list_lock); 2176 spin_unlock(&sb->s_inode_list_lock);
2164 iput(old_inode); 2177 iput(old_inode);
2178 mutex_unlock(&sb->s_sync_lock);
2165} 2179}
2166 2180
2167static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, 2181static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
diff --git a/fs/inode.c b/fs/inode.c
index d30640f7a193..78a17b8859e1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -28,16 +28,16 @@
28 * inode->i_state, inode->i_hash, __iget() 28 * inode->i_state, inode->i_hash, __iget()
29 * Inode LRU list locks protect: 29 * Inode LRU list locks protect:
30 * inode->i_sb->s_inode_lru, inode->i_lru 30 * inode->i_sb->s_inode_lru, inode->i_lru
31 * inode_sb_list_lock protects: 31 * inode->i_sb->s_inode_list_lock protects:
32 * sb->s_inodes, inode->i_sb_list 32 * inode->i_sb->s_inodes, inode->i_sb_list
33 * bdi->wb.list_lock protects: 33 * bdi->wb.list_lock protects:
34 * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list 34 * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
35 * inode_hash_lock protects: 35 * inode_hash_lock protects:
36 * inode_hashtable, inode->i_hash 36 * inode_hashtable, inode->i_hash
37 * 37 *
38 * Lock ordering: 38 * Lock ordering:
39 * 39 *
40 * inode_sb_list_lock 40 * inode->i_sb->s_inode_list_lock
41 * inode->i_lock 41 * inode->i_lock
42 * Inode LRU list locks 42 * Inode LRU list locks
43 * 43 *
@@ -45,7 +45,7 @@
45 * inode->i_lock 45 * inode->i_lock
46 * 46 *
47 * inode_hash_lock 47 * inode_hash_lock
48 * inode_sb_list_lock 48 * inode->i_sb->s_inode_list_lock
49 * inode->i_lock 49 * inode->i_lock
50 * 50 *
51 * iunique_lock 51 * iunique_lock
@@ -57,8 +57,6 @@ static unsigned int i_hash_shift __read_mostly;
57static struct hlist_head *inode_hashtable __read_mostly; 57static struct hlist_head *inode_hashtable __read_mostly;
58static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); 58static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
59 59
60__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
61
62/* 60/*
63 * Empty aops. Can be used for the cases where the user does not 61 * Empty aops. Can be used for the cases where the user does not
64 * define any of the address_space operations. 62 * define any of the address_space operations.
@@ -359,7 +357,7 @@ void inode_init_once(struct inode *inode)
359 memset(inode, 0, sizeof(*inode)); 357 memset(inode, 0, sizeof(*inode));
360 INIT_HLIST_NODE(&inode->i_hash); 358 INIT_HLIST_NODE(&inode->i_hash);
361 INIT_LIST_HEAD(&inode->i_devices); 359 INIT_LIST_HEAD(&inode->i_devices);
362 INIT_LIST_HEAD(&inode->i_wb_list); 360 INIT_LIST_HEAD(&inode->i_io_list);
363 INIT_LIST_HEAD(&inode->i_lru); 361 INIT_LIST_HEAD(&inode->i_lru);
364 address_space_init_once(&inode->i_data); 362 address_space_init_once(&inode->i_data);
365 i_size_ordered_init(inode); 363 i_size_ordered_init(inode);
@@ -426,18 +424,18 @@ static void inode_lru_list_del(struct inode *inode)
426 */ 424 */
427void inode_sb_list_add(struct inode *inode) 425void inode_sb_list_add(struct inode *inode)
428{ 426{
429 spin_lock(&inode_sb_list_lock); 427 spin_lock(&inode->i_sb->s_inode_list_lock);
430 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); 428 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
431 spin_unlock(&inode_sb_list_lock); 429 spin_unlock(&inode->i_sb->s_inode_list_lock);
432} 430}
433EXPORT_SYMBOL_GPL(inode_sb_list_add); 431EXPORT_SYMBOL_GPL(inode_sb_list_add);
434 432
435static inline void inode_sb_list_del(struct inode *inode) 433static inline void inode_sb_list_del(struct inode *inode)
436{ 434{
437 if (!list_empty(&inode->i_sb_list)) { 435 if (!list_empty(&inode->i_sb_list)) {
438 spin_lock(&inode_sb_list_lock); 436 spin_lock(&inode->i_sb->s_inode_list_lock);
439 list_del_init(&inode->i_sb_list); 437 list_del_init(&inode->i_sb_list);
440 spin_unlock(&inode_sb_list_lock); 438 spin_unlock(&inode->i_sb->s_inode_list_lock);
441 } 439 }
442} 440}
443 441
@@ -527,8 +525,8 @@ static void evict(struct inode *inode)
527 BUG_ON(!(inode->i_state & I_FREEING)); 525 BUG_ON(!(inode->i_state & I_FREEING));
528 BUG_ON(!list_empty(&inode->i_lru)); 526 BUG_ON(!list_empty(&inode->i_lru));
529 527
530 if (!list_empty(&inode->i_wb_list)) 528 if (!list_empty(&inode->i_io_list))
531 inode_wb_list_del(inode); 529 inode_io_list_del(inode);
532 530
533 inode_sb_list_del(inode); 531 inode_sb_list_del(inode);
534 532
@@ -577,6 +575,7 @@ static void dispose_list(struct list_head *head)
577 list_del_init(&inode->i_lru); 575 list_del_init(&inode->i_lru);
578 576
579 evict(inode); 577 evict(inode);
578 cond_resched();
580 } 579 }
581} 580}
582 581
@@ -594,7 +593,8 @@ void evict_inodes(struct super_block *sb)
594 struct inode *inode, *next; 593 struct inode *inode, *next;
595 LIST_HEAD(dispose); 594 LIST_HEAD(dispose);
596 595
597 spin_lock(&inode_sb_list_lock); 596again:
597 spin_lock(&sb->s_inode_list_lock);
598 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 598 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
599 if (atomic_read(&inode->i_count)) 599 if (atomic_read(&inode->i_count))
600 continue; 600 continue;
@@ -609,8 +609,20 @@ void evict_inodes(struct super_block *sb)
609 inode_lru_list_del(inode); 609 inode_lru_list_del(inode);
610 spin_unlock(&inode->i_lock); 610 spin_unlock(&inode->i_lock);
611 list_add(&inode->i_lru, &dispose); 611 list_add(&inode->i_lru, &dispose);
612
613 /*
614 * We can have a ton of inodes to evict at unmount time given
615 * enough memory, check to see if we need to go to sleep for a
616 * bit so we don't livelock.
617 */
618 if (need_resched()) {
619 spin_unlock(&sb->s_inode_list_lock);
620 cond_resched();
621 dispose_list(&dispose);
622 goto again;
623 }
612 } 624 }
613 spin_unlock(&inode_sb_list_lock); 625 spin_unlock(&sb->s_inode_list_lock);
614 626
615 dispose_list(&dispose); 627 dispose_list(&dispose);
616} 628}
@@ -631,7 +643,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
631 struct inode *inode, *next; 643 struct inode *inode, *next;
632 LIST_HEAD(dispose); 644 LIST_HEAD(dispose);
633 645
634 spin_lock(&inode_sb_list_lock); 646 spin_lock(&sb->s_inode_list_lock);
635 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 647 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
636 spin_lock(&inode->i_lock); 648 spin_lock(&inode->i_lock);
637 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 649 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
@@ -654,7 +666,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
654 spin_unlock(&inode->i_lock); 666 spin_unlock(&inode->i_lock);
655 list_add(&inode->i_lru, &dispose); 667 list_add(&inode->i_lru, &dispose);
656 } 668 }
657 spin_unlock(&inode_sb_list_lock); 669 spin_unlock(&sb->s_inode_list_lock);
658 670
659 dispose_list(&dispose); 671 dispose_list(&dispose);
660 672
@@ -890,7 +902,7 @@ struct inode *new_inode(struct super_block *sb)
890{ 902{
891 struct inode *inode; 903 struct inode *inode;
892 904
893 spin_lock_prefetch(&inode_sb_list_lock); 905 spin_lock_prefetch(&sb->s_inode_list_lock);
894 906
895 inode = new_inode_pseudo(sb); 907 inode = new_inode_pseudo(sb);
896 if (inode) 908 if (inode)
diff --git a/fs/internal.h b/fs/internal.h
index 4d5af583ab03..71859c4d0b41 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,14 +112,13 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
112/* 112/*
113 * inode.c 113 * inode.c
114 */ 114 */
115extern spinlock_t inode_sb_list_lock;
116extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); 115extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
117extern void inode_add_lru(struct inode *inode); 116extern void inode_add_lru(struct inode *inode);
118 117
119/* 118/*
120 * fs-writeback.c 119 * fs-writeback.c
121 */ 120 */
122extern void inode_wb_list_del(struct inode *inode); 121extern void inode_io_list_del(struct inode *inode);
123 122
124extern long get_nr_dirty_inodes(void); 123extern long get_nr_dirty_inodes(void);
125extern void evict_inodes(struct super_block *); 124extern void evict_inodes(struct super_block *);
diff --git a/fs/namei.c b/fs/namei.c
index 1c2105ed20c5..29b927938b8c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -560,6 +560,24 @@ static int __nd_alloc_stack(struct nameidata *nd)
560 return 0; 560 return 0;
561} 561}
562 562
563/**
564 * path_connected - Verify that a path->dentry is below path->mnt.mnt_root
565 * @path: nameidate to verify
566 *
567 * Rename can sometimes move a file or directory outside of a bind
568 * mount, path_connected allows those cases to be detected.
569 */
570static bool path_connected(const struct path *path)
571{
572 struct vfsmount *mnt = path->mnt;
573
574 /* Only bind mounts can have disconnected paths */
575 if (mnt->mnt_root == mnt->mnt_sb->s_root)
576 return true;
577
578 return is_subdir(path->dentry, mnt->mnt_root);
579}
580
563static inline int nd_alloc_stack(struct nameidata *nd) 581static inline int nd_alloc_stack(struct nameidata *nd)
564{ 582{
565 if (likely(nd->depth != EMBEDDED_LEVELS)) 583 if (likely(nd->depth != EMBEDDED_LEVELS))
@@ -1296,6 +1314,8 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1296 return -ECHILD; 1314 return -ECHILD;
1297 nd->path.dentry = parent; 1315 nd->path.dentry = parent;
1298 nd->seq = seq; 1316 nd->seq = seq;
1317 if (unlikely(!path_connected(&nd->path)))
1318 return -ENOENT;
1299 break; 1319 break;
1300 } else { 1320 } else {
1301 struct mount *mnt = real_mount(nd->path.mnt); 1321 struct mount *mnt = real_mount(nd->path.mnt);
@@ -1396,7 +1416,7 @@ static void follow_mount(struct path *path)
1396 } 1416 }
1397} 1417}
1398 1418
1399static void follow_dotdot(struct nameidata *nd) 1419static int follow_dotdot(struct nameidata *nd)
1400{ 1420{
1401 if (!nd->root.mnt) 1421 if (!nd->root.mnt)
1402 set_root(nd); 1422 set_root(nd);
@@ -1412,6 +1432,8 @@ static void follow_dotdot(struct nameidata *nd)
1412 /* rare case of legitimate dget_parent()... */ 1432 /* rare case of legitimate dget_parent()... */
1413 nd->path.dentry = dget_parent(nd->path.dentry); 1433 nd->path.dentry = dget_parent(nd->path.dentry);
1414 dput(old); 1434 dput(old);
1435 if (unlikely(!path_connected(&nd->path)))
1436 return -ENOENT;
1415 break; 1437 break;
1416 } 1438 }
1417 if (!follow_up(&nd->path)) 1439 if (!follow_up(&nd->path))
@@ -1419,6 +1441,7 @@ static void follow_dotdot(struct nameidata *nd)
1419 } 1441 }
1420 follow_mount(&nd->path); 1442 follow_mount(&nd->path);
1421 nd->inode = nd->path.dentry->d_inode; 1443 nd->inode = nd->path.dentry->d_inode;
1444 return 0;
1422} 1445}
1423 1446
1424/* 1447/*
@@ -1634,7 +1657,7 @@ static inline int handle_dots(struct nameidata *nd, int type)
1634 if (nd->flags & LOOKUP_RCU) { 1657 if (nd->flags & LOOKUP_RCU) {
1635 return follow_dotdot_rcu(nd); 1658 return follow_dotdot_rcu(nd);
1636 } else 1659 } else
1637 follow_dotdot(nd); 1660 return follow_dotdot(nd);
1638 } 1661 }
1639 return 0; 1662 return 0;
1640} 1663}
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 474a3ce1b5e1..e785fd954c30 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -143,17 +143,17 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
143 143
144/** 144/**
145 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. 145 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
146 * @list: list of inodes being unmounted (sb->s_inodes) 146 * @sb: superblock being unmounted.
147 * 147 *
148 * Called during unmount with no locks held, so needs to be safe against 148 * Called during unmount with no locks held, so needs to be safe against
149 * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. 149 * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
150 */ 150 */
151void fsnotify_unmount_inodes(struct list_head *list) 151void fsnotify_unmount_inodes(struct super_block *sb)
152{ 152{
153 struct inode *inode, *next_i, *need_iput = NULL; 153 struct inode *inode, *next_i, *need_iput = NULL;
154 154
155 spin_lock(&inode_sb_list_lock); 155 spin_lock(&sb->s_inode_list_lock);
156 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 156 list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) {
157 struct inode *need_iput_tmp; 157 struct inode *need_iput_tmp;
158 158
159 /* 159 /*
@@ -189,7 +189,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
189 spin_unlock(&inode->i_lock); 189 spin_unlock(&inode->i_lock);
190 190
191 /* In case the dropping of a reference would nuke next_i. */ 191 /* In case the dropping of a reference would nuke next_i. */
192 while (&next_i->i_sb_list != list) { 192 while (&next_i->i_sb_list != &sb->s_inodes) {
193 spin_lock(&next_i->i_lock); 193 spin_lock(&next_i->i_lock);
194 if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && 194 if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
195 atomic_read(&next_i->i_count)) { 195 atomic_read(&next_i->i_count)) {
@@ -204,12 +204,12 @@ void fsnotify_unmount_inodes(struct list_head *list)
204 } 204 }
205 205
206 /* 206 /*
207 * We can safely drop inode_sb_list_lock here because either 207 * We can safely drop s_inode_list_lock here because either
208 * we actually hold references on both inode and next_i or 208 * we actually hold references on both inode and next_i or
209 * end of list. Also no new inodes will be added since the 209 * end of list. Also no new inodes will be added since the
210 * umount has begun. 210 * umount has begun.
211 */ 211 */
212 spin_unlock(&inode_sb_list_lock); 212 spin_unlock(&sb->s_inode_list_lock);
213 213
214 if (need_iput_tmp) 214 if (need_iput_tmp)
215 iput(need_iput_tmp); 215 iput(need_iput_tmp);
@@ -221,7 +221,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
221 221
222 iput(inode); 222 iput(inode);
223 223
224 spin_lock(&inode_sb_list_lock); 224 spin_lock(&sb->s_inode_list_lock);
225 } 225 }
226 spin_unlock(&inode_sb_list_lock); 226 spin_unlock(&sb->s_inode_list_lock);
227} 227}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fed66e2c9fe8..ef0d64b2a6d9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -928,7 +928,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
928 int reserved = 0; 928 int reserved = 0;
929#endif 929#endif
930 930
931 spin_lock(&inode_sb_list_lock); 931 spin_lock(&sb->s_inode_list_lock);
932 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 932 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
933 spin_lock(&inode->i_lock); 933 spin_lock(&inode->i_lock);
934 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 934 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -939,7 +939,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
939 } 939 }
940 __iget(inode); 940 __iget(inode);
941 spin_unlock(&inode->i_lock); 941 spin_unlock(&inode->i_lock);
942 spin_unlock(&inode_sb_list_lock); 942 spin_unlock(&sb->s_inode_list_lock);
943 943
944#ifdef CONFIG_QUOTA_DEBUG 944#ifdef CONFIG_QUOTA_DEBUG
945 if (unlikely(inode_get_rsv_space(inode) > 0)) 945 if (unlikely(inode_get_rsv_space(inode) > 0))
@@ -951,15 +951,15 @@ static void add_dquot_ref(struct super_block *sb, int type)
951 /* 951 /*
952 * We hold a reference to 'inode' so it couldn't have been 952 * We hold a reference to 'inode' so it couldn't have been
953 * removed from s_inodes list while we dropped the 953 * removed from s_inodes list while we dropped the
954 * inode_sb_list_lock We cannot iput the inode now as we can be 954 * s_inode_list_lock. We cannot iput the inode now as we can be
955 * holding the last reference and we cannot iput it under 955 * holding the last reference and we cannot iput it under
956 * inode_sb_list_lock. So we keep the reference and iput it 956 * s_inode_list_lock. So we keep the reference and iput it
957 * later. 957 * later.
958 */ 958 */
959 old_inode = inode; 959 old_inode = inode;
960 spin_lock(&inode_sb_list_lock); 960 spin_lock(&sb->s_inode_list_lock);
961 } 961 }
962 spin_unlock(&inode_sb_list_lock); 962 spin_unlock(&sb->s_inode_list_lock);
963 iput(old_inode); 963 iput(old_inode);
964 964
965#ifdef CONFIG_QUOTA_DEBUG 965#ifdef CONFIG_QUOTA_DEBUG
@@ -1028,7 +1028,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
1028 struct inode *inode; 1028 struct inode *inode;
1029 int reserved = 0; 1029 int reserved = 0;
1030 1030
1031 spin_lock(&inode_sb_list_lock); 1031 spin_lock(&sb->s_inode_list_lock);
1032 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1032 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1033 /* 1033 /*
1034 * We have to scan also I_NEW inodes because they can already 1034 * We have to scan also I_NEW inodes because they can already
@@ -1044,7 +1044,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
1044 } 1044 }
1045 spin_unlock(&dq_data_lock); 1045 spin_unlock(&dq_data_lock);
1046 } 1046 }
1047 spin_unlock(&inode_sb_list_lock); 1047 spin_unlock(&sb->s_inode_list_lock);
1048#ifdef CONFIG_QUOTA_DEBUG 1048#ifdef CONFIG_QUOTA_DEBUG
1049 if (reserved) { 1049 if (reserved) {
1050 printk(KERN_WARNING "VFS (%s): Writes happened after quota" 1050 printk(KERN_WARNING "VFS (%s): Writes happened after quota"
diff --git a/fs/super.c b/fs/super.c
index b61372354f2b..954aeb80e202 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
135 return total_objects; 135 return total_objects;
136} 136}
137 137
138static void destroy_super_work(struct work_struct *work)
139{
140 struct super_block *s = container_of(work, struct super_block,
141 destroy_work);
142 int i;
143
144 for (i = 0; i < SB_FREEZE_LEVELS; i++)
145 percpu_free_rwsem(&s->s_writers.rw_sem[i]);
146 kfree(s);
147}
148
149static void destroy_super_rcu(struct rcu_head *head)
150{
151 struct super_block *s = container_of(head, struct super_block, rcu);
152 INIT_WORK(&s->destroy_work, destroy_super_work);
153 schedule_work(&s->destroy_work);
154}
155
138/** 156/**
139 * destroy_super - frees a superblock 157 * destroy_super - frees a superblock
140 * @s: superblock to free 158 * @s: superblock to free
@@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink,
143 */ 161 */
144static void destroy_super(struct super_block *s) 162static void destroy_super(struct super_block *s)
145{ 163{
146 int i;
147 list_lru_destroy(&s->s_dentry_lru); 164 list_lru_destroy(&s->s_dentry_lru);
148 list_lru_destroy(&s->s_inode_lru); 165 list_lru_destroy(&s->s_inode_lru);
149 for (i = 0; i < SB_FREEZE_LEVELS; i++)
150 percpu_counter_destroy(&s->s_writers.counter[i]);
151 security_sb_free(s); 166 security_sb_free(s);
152 WARN_ON(!list_empty(&s->s_mounts)); 167 WARN_ON(!list_empty(&s->s_mounts));
153 kfree(s->s_subtype); 168 kfree(s->s_subtype);
154 kfree(s->s_options); 169 kfree(s->s_options);
155 kfree_rcu(s, rcu); 170 call_rcu(&s->rcu, destroy_super_rcu);
156} 171}
157 172
158/** 173/**
@@ -178,19 +193,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
178 goto fail; 193 goto fail;
179 194
180 for (i = 0; i < SB_FREEZE_LEVELS; i++) { 195 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
181 if (percpu_counter_init(&s->s_writers.counter[i], 0, 196 if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
182 GFP_KERNEL) < 0) 197 sb_writers_name[i],
198 &type->s_writers_key[i]))
183 goto fail; 199 goto fail;
184 lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
185 &type->s_writers_key[i], 0);
186 } 200 }
187 init_waitqueue_head(&s->s_writers.wait);
188 init_waitqueue_head(&s->s_writers.wait_unfrozen); 201 init_waitqueue_head(&s->s_writers.wait_unfrozen);
189 s->s_bdi = &noop_backing_dev_info; 202 s->s_bdi = &noop_backing_dev_info;
190 s->s_flags = flags; 203 s->s_flags = flags;
191 INIT_HLIST_NODE(&s->s_instances); 204 INIT_HLIST_NODE(&s->s_instances);
192 INIT_HLIST_BL_HEAD(&s->s_anon); 205 INIT_HLIST_BL_HEAD(&s->s_anon);
206 mutex_init(&s->s_sync_lock);
193 INIT_LIST_HEAD(&s->s_inodes); 207 INIT_LIST_HEAD(&s->s_inodes);
208 spin_lock_init(&s->s_inode_list_lock);
194 209
195 if (list_lru_init_memcg(&s->s_dentry_lru)) 210 if (list_lru_init_memcg(&s->s_dentry_lru))
196 goto fail; 211 goto fail;
@@ -399,7 +414,7 @@ void generic_shutdown_super(struct super_block *sb)
399 sync_filesystem(sb); 414 sync_filesystem(sb);
400 sb->s_flags &= ~MS_ACTIVE; 415 sb->s_flags &= ~MS_ACTIVE;
401 416
402 fsnotify_unmount_inodes(&sb->s_inodes); 417 fsnotify_unmount_inodes(sb);
403 418
404 evict_inodes(sb); 419 evict_inodes(sb);
405 420
@@ -1146,72 +1161,46 @@ out:
1146 */ 1161 */
1147void __sb_end_write(struct super_block *sb, int level) 1162void __sb_end_write(struct super_block *sb, int level)
1148{ 1163{
1149 percpu_counter_dec(&sb->s_writers.counter[level-1]); 1164 percpu_up_read(sb->s_writers.rw_sem + level-1);
1150 /*
1151 * Make sure s_writers are updated before we wake up waiters in
1152 * freeze_super().
1153 */
1154 smp_mb();
1155 if (waitqueue_active(&sb->s_writers.wait))
1156 wake_up(&sb->s_writers.wait);
1157 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
1158} 1165}
1159EXPORT_SYMBOL(__sb_end_write); 1166EXPORT_SYMBOL(__sb_end_write);
1160 1167
1161#ifdef CONFIG_LOCKDEP
1162/*
1163 * We want lockdep to tell us about possible deadlocks with freezing but
1164 * it's it bit tricky to properly instrument it. Getting a freeze protection
1165 * works as getting a read lock but there are subtle problems. XFS for example
1166 * gets freeze protection on internal level twice in some cases, which is OK
1167 * only because we already hold a freeze protection also on higher level. Due
1168 * to these cases we have to tell lockdep we are doing trylock when we
1169 * already hold a freeze protection for a higher freeze level.
1170 */
1171static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
1172 unsigned long ip)
1173{
1174 int i;
1175
1176 if (!trylock) {
1177 for (i = 0; i < level - 1; i++)
1178 if (lock_is_held(&sb->s_writers.lock_map[i])) {
1179 trylock = true;
1180 break;
1181 }
1182 }
1183 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
1184}
1185#endif
1186
1187/* 1168/*
1188 * This is an internal function, please use sb_start_{write,pagefault,intwrite} 1169 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
1189 * instead. 1170 * instead.
1190 */ 1171 */
1191int __sb_start_write(struct super_block *sb, int level, bool wait) 1172int __sb_start_write(struct super_block *sb, int level, bool wait)
1192{ 1173{
1193retry: 1174 bool force_trylock = false;
1194 if (unlikely(sb->s_writers.frozen >= level)) { 1175 int ret = 1;
1195 if (!wait)
1196 return 0;
1197 wait_event(sb->s_writers.wait_unfrozen,
1198 sb->s_writers.frozen < level);
1199 }
1200 1176
1201#ifdef CONFIG_LOCKDEP 1177#ifdef CONFIG_LOCKDEP
1202 acquire_freeze_lock(sb, level, !wait, _RET_IP_);
1203#endif
1204 percpu_counter_inc(&sb->s_writers.counter[level-1]);
1205 /* 1178 /*
1206 * Make sure counter is updated before we check for frozen. 1179 * We want lockdep to tell us about possible deadlocks with freezing
1207 * freeze_super() first sets frozen and then checks the counter. 1180 * but it's it bit tricky to properly instrument it. Getting a freeze
1181 * protection works as getting a read lock but there are subtle
1182 * problems. XFS for example gets freeze protection on internal level
1183 * twice in some cases, which is OK only because we already hold a
1184 * freeze protection also on higher level. Due to these cases we have
1185 * to use wait == F (trylock mode) which must not fail.
1208 */ 1186 */
1209 smp_mb(); 1187 if (wait) {
1210 if (unlikely(sb->s_writers.frozen >= level)) { 1188 int i;
1211 __sb_end_write(sb, level); 1189
1212 goto retry; 1190 for (i = 0; i < level - 1; i++)
1191 if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
1192 force_trylock = true;
1193 break;
1194 }
1213 } 1195 }
1214 return 1; 1196#endif
1197 if (wait && !force_trylock)
1198 percpu_down_read(sb->s_writers.rw_sem + level-1);
1199 else
1200 ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
1201
1202 WARN_ON(force_trylock & !ret);
1203 return ret;
1215} 1204}
1216EXPORT_SYMBOL(__sb_start_write); 1205EXPORT_SYMBOL(__sb_start_write);
1217 1206
@@ -1221,37 +1210,33 @@ EXPORT_SYMBOL(__sb_start_write);
1221 * @level: type of writers we wait for (normal vs page fault) 1210 * @level: type of writers we wait for (normal vs page fault)
1222 * 1211 *
1223 * This function waits until there are no writers of given type to given file 1212 * This function waits until there are no writers of given type to given file
1224 * system. Caller of this function should make sure there can be no new writers 1213 * system.
1225 * of type @level before calling this function. Otherwise this function can
1226 * livelock.
1227 */ 1214 */
1228static void sb_wait_write(struct super_block *sb, int level) 1215static void sb_wait_write(struct super_block *sb, int level)
1229{ 1216{
1230 s64 writers; 1217 percpu_down_write(sb->s_writers.rw_sem + level-1);
1231
1232 /* 1218 /*
1233 * We just cycle-through lockdep here so that it does not complain 1219 * We are going to return to userspace and forget about this lock, the
1234 * about returning with lock to userspace 1220 * ownership goes to the caller of thaw_super() which does unlock.
1221 *
1222 * FIXME: we should do this before return from freeze_super() after we
1223 * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
1224 * should re-acquire these locks before s_op->unfreeze_fs(sb). However
1225 * this leads to lockdep false-positives, so currently we do the early
1226 * release right after acquire.
1235 */ 1227 */
1236 rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_); 1228 percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
1237 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_); 1229}
1238
1239 do {
1240 DEFINE_WAIT(wait);
1241 1230
1242 /* 1231static void sb_freeze_unlock(struct super_block *sb)
1243 * We use a barrier in prepare_to_wait() to separate setting 1232{
1244 * of frozen and checking of the counter 1233 int level;
1245 */
1246 prepare_to_wait(&sb->s_writers.wait, &wait,
1247 TASK_UNINTERRUPTIBLE);
1248 1234
1249 writers = percpu_counter_sum(&sb->s_writers.counter[level-1]); 1235 for (level = 0; level < SB_FREEZE_LEVELS; ++level)
1250 if (writers) 1236 percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
1251 schedule();
1252 1237
1253 finish_wait(&sb->s_writers.wait, &wait); 1238 for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
1254 } while (writers); 1239 percpu_up_write(sb->s_writers.rw_sem + level);
1255} 1240}
1256 1241
1257/** 1242/**
@@ -1310,20 +1295,14 @@ int freeze_super(struct super_block *sb)
1310 return 0; 1295 return 0;
1311 } 1296 }
1312 1297
1313 /* From now on, no new normal writers can start */
1314 sb->s_writers.frozen = SB_FREEZE_WRITE; 1298 sb->s_writers.frozen = SB_FREEZE_WRITE;
1315 smp_wmb();
1316
1317 /* Release s_umount to preserve sb_start_write -> s_umount ordering */ 1299 /* Release s_umount to preserve sb_start_write -> s_umount ordering */
1318 up_write(&sb->s_umount); 1300 up_write(&sb->s_umount);
1319
1320 sb_wait_write(sb, SB_FREEZE_WRITE); 1301 sb_wait_write(sb, SB_FREEZE_WRITE);
1302 down_write(&sb->s_umount);
1321 1303
1322 /* Now we go and block page faults... */ 1304 /* Now we go and block page faults... */
1323 down_write(&sb->s_umount);
1324 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; 1305 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1325 smp_wmb();
1326
1327 sb_wait_write(sb, SB_FREEZE_PAGEFAULT); 1306 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1328 1307
1329 /* All writers are done so after syncing there won't be dirty data */ 1308 /* All writers are done so after syncing there won't be dirty data */
@@ -1331,7 +1310,6 @@ int freeze_super(struct super_block *sb)
1331 1310
1332 /* Now wait for internal filesystem counter */ 1311 /* Now wait for internal filesystem counter */
1333 sb->s_writers.frozen = SB_FREEZE_FS; 1312 sb->s_writers.frozen = SB_FREEZE_FS;
1334 smp_wmb();
1335 sb_wait_write(sb, SB_FREEZE_FS); 1313 sb_wait_write(sb, SB_FREEZE_FS);
1336 1314
1337 if (sb->s_op->freeze_fs) { 1315 if (sb->s_op->freeze_fs) {
@@ -1340,7 +1318,7 @@ int freeze_super(struct super_block *sb)
1340 printk(KERN_ERR 1318 printk(KERN_ERR
1341 "VFS:Filesystem freeze failed\n"); 1319 "VFS:Filesystem freeze failed\n");
1342 sb->s_writers.frozen = SB_UNFROZEN; 1320 sb->s_writers.frozen = SB_UNFROZEN;
1343 smp_wmb(); 1321 sb_freeze_unlock(sb);
1344 wake_up(&sb->s_writers.wait_unfrozen); 1322 wake_up(&sb->s_writers.wait_unfrozen);
1345 deactivate_locked_super(sb); 1323 deactivate_locked_super(sb);
1346 return ret; 1324 return ret;
@@ -1372,8 +1350,10 @@ int thaw_super(struct super_block *sb)
1372 return -EINVAL; 1350 return -EINVAL;
1373 } 1351 }
1374 1352
1375 if (sb->s_flags & MS_RDONLY) 1353 if (sb->s_flags & MS_RDONLY) {
1354 sb->s_writers.frozen = SB_UNFROZEN;
1376 goto out; 1355 goto out;
1356 }
1377 1357
1378 if (sb->s_op->unfreeze_fs) { 1358 if (sb->s_op->unfreeze_fs) {
1379 error = sb->s_op->unfreeze_fs(sb); 1359 error = sb->s_op->unfreeze_fs(sb);
@@ -1385,12 +1365,11 @@ int thaw_super(struct super_block *sb)
1385 } 1365 }
1386 } 1366 }
1387 1367
1388out:
1389 sb->s_writers.frozen = SB_UNFROZEN; 1368 sb->s_writers.frozen = SB_UNFROZEN;
1390 smp_wmb(); 1369 sb_freeze_unlock(sb);
1370out:
1391 wake_up(&sb->s_writers.wait_unfrozen); 1371 wake_up(&sb->s_writers.wait_unfrozen);
1392 deactivate_locked_super(sb); 1372 deactivate_locked_super(sb);
1393
1394 return 0; 1373 return 0;
1395} 1374}
1396EXPORT_SYMBOL(thaw_super); 1375EXPORT_SYMBOL(thaw_super);
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index 4d0e02b022b3..392db25c0b56 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -5,5 +5,5 @@
5obj-$(CONFIG_UFS_FS) += ufs.o 5obj-$(CONFIG_UFS_FS) += ufs.o
6 6
7ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \ 7ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
8 namei.o super.o symlink.o truncate.o util.o 8 namei.o super.o symlink.o util.o
9ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG 9ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a7106eda5024..fb8b54eb77c5 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
417 if (oldcount == 0) { 417 if (oldcount == 0) {
418 result = ufs_alloc_fragments (inode, cgno, goal, count, err); 418 result = ufs_alloc_fragments (inode, cgno, goal, count, err);
419 if (result) { 419 if (result) {
420 write_seqlock(&UFS_I(inode)->meta_lock);
420 ufs_cpu_to_data_ptr(sb, p, result); 421 ufs_cpu_to_data_ptr(sb, p, result);
422 write_sequnlock(&UFS_I(inode)->meta_lock);
421 *err = 0; 423 *err = 0;
422 UFS_I(inode)->i_lastfrag = 424 UFS_I(inode)->i_lastfrag =
423 max(UFS_I(inode)->i_lastfrag, fragment + count); 425 max(UFS_I(inode)->i_lastfrag, fragment + count);
@@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
473 ufs_change_blocknr(inode, fragment - oldcount, oldcount, 475 ufs_change_blocknr(inode, fragment - oldcount, oldcount,
474 uspi->s_sbbase + tmp, 476 uspi->s_sbbase + tmp,
475 uspi->s_sbbase + result, locked_page); 477 uspi->s_sbbase + result, locked_page);
478 write_seqlock(&UFS_I(inode)->meta_lock);
476 ufs_cpu_to_data_ptr(sb, p, result); 479 ufs_cpu_to_data_ptr(sb, p, result);
480 write_sequnlock(&UFS_I(inode)->meta_lock);
477 *err = 0; 481 *err = 0;
478 UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, 482 UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
479 fragment + count); 483 fragment + count);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index f913a6924b23..a064cf44b143 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -41,9 +41,7 @@
41#include "swab.h" 41#include "swab.h"
42#include "util.h" 42#include "util.h"
43 43
44static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock); 44static int ufs_block_to_path(struct inode *inode, sector_t i_block, unsigned offsets[4])
45
46static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
47{ 45{
48 struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; 46 struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
49 int ptrs = uspi->s_apb; 47 int ptrs = uspi->s_apb;
@@ -75,227 +73,232 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
75 return n; 73 return n;
76} 74}
77 75
76typedef struct {
77 void *p;
78 union {
79 __fs32 key32;
80 __fs64 key64;
81 };
82 struct buffer_head *bh;
83} Indirect;
84
85static inline int grow_chain32(struct ufs_inode_info *ufsi,
86 struct buffer_head *bh, __fs32 *v,
87 Indirect *from, Indirect *to)
88{
89 Indirect *p;
90 unsigned seq;
91 to->bh = bh;
92 do {
93 seq = read_seqbegin(&ufsi->meta_lock);
94 to->key32 = *(__fs32 *)(to->p = v);
95 for (p = from; p <= to && p->key32 == *(__fs32 *)p->p; p++)
96 ;
97 } while (read_seqretry(&ufsi->meta_lock, seq));
98 return (p > to);
99}
100
101static inline int grow_chain64(struct ufs_inode_info *ufsi,
102 struct buffer_head *bh, __fs64 *v,
103 Indirect *from, Indirect *to)
104{
105 Indirect *p;
106 unsigned seq;
107 to->bh = bh;
108 do {
109 seq = read_seqbegin(&ufsi->meta_lock);
110 to->key64 = *(__fs64 *)(to->p = v);
111 for (p = from; p <= to && p->key64 == *(__fs64 *)p->p; p++)
112 ;
113 } while (read_seqretry(&ufsi->meta_lock, seq));
114 return (p > to);
115}
116
78/* 117/*
79 * Returns the location of the fragment from 118 * Returns the location of the fragment from
80 * the beginning of the filesystem. 119 * the beginning of the filesystem.
81 */ 120 */
82 121
83static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) 122static u64 ufs_frag_map(struct inode *inode, unsigned offsets[4], int depth)
84{ 123{
85 struct ufs_inode_info *ufsi = UFS_I(inode); 124 struct ufs_inode_info *ufsi = UFS_I(inode);
86 struct super_block *sb = inode->i_sb; 125 struct super_block *sb = inode->i_sb;
87 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 126 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
88 u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift; 127 u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift;
89 int shift = uspi->s_apbshift-uspi->s_fpbshift; 128 int shift = uspi->s_apbshift-uspi->s_fpbshift;
90 sector_t offsets[4], *p; 129 Indirect chain[4], *q = chain;
91 int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets); 130 unsigned *p;
92 u64 ret = 0L;
93 __fs32 block;
94 __fs64 u2_block = 0L;
95 unsigned flags = UFS_SB(sb)->s_flags; 131 unsigned flags = UFS_SB(sb)->s_flags;
96 u64 temp = 0L; 132 u64 res = 0;
97 133
98 UFSD(": frag = %llu depth = %d\n", (unsigned long long)frag, depth);
99 UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n", 134 UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",
100 uspi->s_fpbshift, uspi->s_apbmask, 135 uspi->s_fpbshift, uspi->s_apbmask,
101 (unsigned long long)mask); 136 (unsigned long long)mask);
102 137
103 if (depth == 0) 138 if (depth == 0)
104 return 0; 139 goto no_block;
105 140
141again:
106 p = offsets; 142 p = offsets;
107 143
108 if (needs_lock)
109 lock_ufs(sb);
110 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 144 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
111 goto ufs2; 145 goto ufs2;
112 146
113 block = ufsi->i_u1.i_data[*p++]; 147 if (!grow_chain32(ufsi, NULL, &ufsi->i_u1.i_data[*p++], chain, q))
114 if (!block) 148 goto changed;
115 goto out; 149 if (!q->key32)
150 goto no_block;
116 while (--depth) { 151 while (--depth) {
152 __fs32 *ptr;
117 struct buffer_head *bh; 153 struct buffer_head *bh;
118 sector_t n = *p++; 154 unsigned n = *p++;
119 155
120 bh = sb_bread(sb, uspi->s_sbbase + fs32_to_cpu(sb, block)+(n>>shift)); 156 bh = sb_bread(sb, uspi->s_sbbase +
157 fs32_to_cpu(sb, q->key32) + (n>>shift));
121 if (!bh) 158 if (!bh)
122 goto out; 159 goto no_block;
123 block = ((__fs32 *) bh->b_data)[n & mask]; 160 ptr = (__fs32 *)bh->b_data + (n & mask);
124 brelse (bh); 161 if (!grow_chain32(ufsi, bh, ptr, chain, ++q))
125 if (!block) 162 goto changed;
126 goto out; 163 if (!q->key32)
127 } 164 goto no_block;
128 ret = (u64) (uspi->s_sbbase + fs32_to_cpu(sb, block) + (frag & uspi->s_fpbmask)); 165 }
129 goto out; 166 res = fs32_to_cpu(sb, q->key32);
130ufs2: 167 goto found;
131 u2_block = ufsi->i_u1.u2_i_data[*p++];
132 if (!u2_block)
133 goto out;
134 168
169ufs2:
170 if (!grow_chain64(ufsi, NULL, &ufsi->i_u1.u2_i_data[*p++], chain, q))
171 goto changed;
172 if (!q->key64)
173 goto no_block;
135 174
136 while (--depth) { 175 while (--depth) {
176 __fs64 *ptr;
137 struct buffer_head *bh; 177 struct buffer_head *bh;
138 sector_t n = *p++; 178 unsigned n = *p++;
139
140 179
141 temp = (u64)(uspi->s_sbbase) + fs64_to_cpu(sb, u2_block); 180 bh = sb_bread(sb, uspi->s_sbbase +
142 bh = sb_bread(sb, temp +(u64) (n>>shift)); 181 fs64_to_cpu(sb, q->key64) + (n>>shift));
143 if (!bh) 182 if (!bh)
144 goto out; 183 goto no_block;
145 u2_block = ((__fs64 *)bh->b_data)[n & mask]; 184 ptr = (__fs64 *)bh->b_data + (n & mask);
146 brelse(bh); 185 if (!grow_chain64(ufsi, bh, ptr, chain, ++q))
147 if (!u2_block) 186 goto changed;
148 goto out; 187 if (!q->key64)
188 goto no_block;
149 } 189 }
150 temp = (u64)uspi->s_sbbase + fs64_to_cpu(sb, u2_block); 190 res = fs64_to_cpu(sb, q->key64);
151 ret = temp + (u64) (frag & uspi->s_fpbmask); 191found:
192 res += uspi->s_sbbase;
193no_block:
194 while (q > chain) {
195 brelse(q->bh);
196 q--;
197 }
198 return res;
152 199
153out: 200changed:
154 if (needs_lock) 201 while (q > chain) {
155 unlock_ufs(sb); 202 brelse(q->bh);
156 return ret; 203 q--;
204 }
205 goto again;
206}
207
208/*
209 * Unpacking tails: we have a file with partial final block and
210 * we had been asked to extend it. If the fragment being written
211 * is within the same block, we need to extend the tail just to cover
212 * that fragment. Otherwise the tail is extended to full block.
213 *
214 * Note that we might need to create a _new_ tail, but that will
215 * be handled elsewhere; this is strictly for resizing old
216 * ones.
217 */
218static bool
219ufs_extend_tail(struct inode *inode, u64 writes_to,
220 int *err, struct page *locked_page)
221{
222 struct ufs_inode_info *ufsi = UFS_I(inode);
223 struct super_block *sb = inode->i_sb;
224 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
225 unsigned lastfrag = ufsi->i_lastfrag; /* it's a short file, so unsigned is enough */
226 unsigned block = ufs_fragstoblks(lastfrag);
227 unsigned new_size;
228 void *p;
229 u64 tmp;
230
231 if (writes_to < (lastfrag | uspi->s_fpbmask))
232 new_size = (writes_to & uspi->s_fpbmask) + 1;
233 else
234 new_size = uspi->s_fpb;
235
236 p = ufs_get_direct_data_ptr(uspi, ufsi, block);
237 tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p),
238 new_size, err, locked_page);
239 return tmp != 0;
157} 240}
158 241
159/** 242/**
160 * ufs_inode_getfrag() - allocate new fragment(s) 243 * ufs_inode_getfrag() - allocate new fragment(s)
161 * @inode: pointer to inode 244 * @inode: pointer to inode
162 * @fragment: number of `fragment' which hold pointer 245 * @index: number of block pointer within the inode's array.
163 * to new allocated fragment(s)
164 * @new_fragment: number of new allocated fragment(s) 246 * @new_fragment: number of new allocated fragment(s)
165 * @required: how many fragment(s) we require
166 * @err: we set it if something wrong 247 * @err: we set it if something wrong
167 * @phys: pointer to where we save physical number of new allocated fragments,
168 * NULL if we allocate not data(indirect blocks for example).
169 * @new: we set it if we allocate new block 248 * @new: we set it if we allocate new block
170 * @locked_page: for ufs_new_fragments() 249 * @locked_page: for ufs_new_fragments()
171 */ 250 */
172static struct buffer_head * 251static u64
173ufs_inode_getfrag(struct inode *inode, u64 fragment, 252ufs_inode_getfrag(struct inode *inode, unsigned index,
174 sector_t new_fragment, unsigned int required, int *err, 253 sector_t new_fragment, int *err,
175 long *phys, int *new, struct page *locked_page) 254 int *new, struct page *locked_page)
176{ 255{
177 struct ufs_inode_info *ufsi = UFS_I(inode); 256 struct ufs_inode_info *ufsi = UFS_I(inode);
178 struct super_block *sb = inode->i_sb; 257 struct super_block *sb = inode->i_sb;
179 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 258 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
180 struct buffer_head * result; 259 u64 tmp, goal, lastfrag;
181 unsigned blockoff, lastblockoff; 260 unsigned nfrags = uspi->s_fpb;
182 u64 tmp, goal, lastfrag, block, lastblock; 261 void *p;
183 void *p, *p2;
184
185 UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, required %u, "
186 "metadata %d\n", inode->i_ino, (unsigned long long)fragment,
187 (unsigned long long)new_fragment, required, !phys);
188 262
189 /* TODO : to be done for write support 263 /* TODO : to be done for write support
190 if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 264 if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
191 goto ufs2; 265 goto ufs2;
192 */ 266 */
193 267
194 block = ufs_fragstoblks (fragment); 268 p = ufs_get_direct_data_ptr(uspi, ufsi, index);
195 blockoff = ufs_fragnum (fragment);
196 p = ufs_get_direct_data_ptr(uspi, ufsi, block);
197
198 goal = 0;
199
200repeat:
201 tmp = ufs_data_ptr_to_cpu(sb, p); 269 tmp = ufs_data_ptr_to_cpu(sb, p);
270 if (tmp)
271 goto out;
202 272
203 lastfrag = ufsi->i_lastfrag; 273 lastfrag = ufsi->i_lastfrag;
204 if (tmp && fragment < lastfrag) {
205 if (!phys) {
206 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
207 if (tmp == ufs_data_ptr_to_cpu(sb, p)) {
208 UFSD("EXIT, result %llu\n",
209 (unsigned long long)tmp + blockoff);
210 return result;
211 }
212 brelse (result);
213 goto repeat;
214 } else {
215 *phys = uspi->s_sbbase + tmp + blockoff;
216 return NULL;
217 }
218 }
219 274
220 lastblock = ufs_fragstoblks (lastfrag); 275 /* will that be a new tail? */
221 lastblockoff = ufs_fragnum (lastfrag); 276 if (new_fragment < UFS_NDIR_FRAGMENT && new_fragment >= lastfrag)
222 /* 277 nfrags = (new_fragment & uspi->s_fpbmask) + 1;
223 * We will extend file into new block beyond last allocated block 278
224 */ 279 goal = 0;
225 if (lastblock < block) { 280 if (index) {
226 /* 281 goal = ufs_data_ptr_to_cpu(sb,
227 * We must reallocate last allocated block 282 ufs_get_direct_data_ptr(uspi, ufsi, index - 1));
228 */ 283 if (goal)
229 if (lastblockoff) { 284 goal += uspi->s_fpb;
230 p2 = ufs_get_direct_data_ptr(uspi, ufsi, lastblock);
231 tmp = ufs_new_fragments(inode, p2, lastfrag,
232 ufs_data_ptr_to_cpu(sb, p2),
233 uspi->s_fpb - lastblockoff,
234 err, locked_page);
235 if (!tmp) {
236 if (lastfrag != ufsi->i_lastfrag)
237 goto repeat;
238 else
239 return NULL;
240 }
241 lastfrag = ufsi->i_lastfrag;
242
243 }
244 tmp = ufs_data_ptr_to_cpu(sb,
245 ufs_get_direct_data_ptr(uspi, ufsi,
246 lastblock));
247 if (tmp)
248 goal = tmp + uspi->s_fpb;
249 tmp = ufs_new_fragments (inode, p, fragment - blockoff,
250 goal, required + blockoff,
251 err,
252 phys != NULL ? locked_page : NULL);
253 } else if (lastblock == block) {
254 /*
255 * We will extend last allocated block
256 */
257 tmp = ufs_new_fragments(inode, p, fragment -
258 (blockoff - lastblockoff),
259 ufs_data_ptr_to_cpu(sb, p),
260 required + (blockoff - lastblockoff),
261 err, phys != NULL ? locked_page : NULL);
262 } else /* (lastblock > block) */ {
263 /*
264 * We will allocate new block before last allocated block
265 */
266 if (block) {
267 tmp = ufs_data_ptr_to_cpu(sb,
268 ufs_get_direct_data_ptr(uspi, ufsi, block - 1));
269 if (tmp)
270 goal = tmp + uspi->s_fpb;
271 }
272 tmp = ufs_new_fragments(inode, p, fragment - blockoff,
273 goal, uspi->s_fpb, err,
274 phys != NULL ? locked_page : NULL);
275 } 285 }
286 tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
287 goal, uspi->s_fpb, err, locked_page);
288
276 if (!tmp) { 289 if (!tmp) {
277 if ((!blockoff && ufs_data_ptr_to_cpu(sb, p)) ||
278 (blockoff && lastfrag != ufsi->i_lastfrag))
279 goto repeat;
280 *err = -ENOSPC; 290 *err = -ENOSPC;
281 return NULL; 291 return 0;
282 } 292 }
283 293
284 if (!phys) { 294 if (new)
285 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
286 } else {
287 *phys = uspi->s_sbbase + tmp + blockoff;
288 result = NULL;
289 *err = 0;
290 *new = 1; 295 *new = 1;
291 }
292
293 inode->i_ctime = CURRENT_TIME_SEC; 296 inode->i_ctime = CURRENT_TIME_SEC;
294 if (IS_SYNC(inode)) 297 if (IS_SYNC(inode))
295 ufs_sync_inode (inode); 298 ufs_sync_inode (inode);
296 mark_inode_dirty(inode); 299 mark_inode_dirty(inode);
297 UFSD("EXIT, result %llu\n", (unsigned long long)tmp + blockoff); 300out:
298 return result; 301 return tmp + uspi->s_sbbase;
299 302
300 /* This part : To be implemented .... 303 /* This part : To be implemented ....
301 Required only for writing, not required for READ-ONLY. 304 Required only for writing, not required for READ-ONLY.
@@ -316,95 +319,70 @@ repeat2:
316/** 319/**
317 * ufs_inode_getblock() - allocate new block 320 * ufs_inode_getblock() - allocate new block
318 * @inode: pointer to inode 321 * @inode: pointer to inode
319 * @bh: pointer to block which hold "pointer" to new allocated block 322 * @ind_block: block number of the indirect block
320 * @fragment: number of `fragment' which hold pointer 323 * @index: number of pointer within the indirect block
321 * to new allocated block
322 * @new_fragment: number of new allocated fragment 324 * @new_fragment: number of new allocated fragment
323 * (block will hold this fragment and also uspi->s_fpb-1) 325 * (block will hold this fragment and also uspi->s_fpb-1)
324 * @err: see ufs_inode_getfrag() 326 * @err: see ufs_inode_getfrag()
325 * @phys: see ufs_inode_getfrag()
326 * @new: see ufs_inode_getfrag() 327 * @new: see ufs_inode_getfrag()
327 * @locked_page: see ufs_inode_getfrag() 328 * @locked_page: see ufs_inode_getfrag()
328 */ 329 */
329static struct buffer_head * 330static u64
330ufs_inode_getblock(struct inode *inode, struct buffer_head *bh, 331ufs_inode_getblock(struct inode *inode, u64 ind_block,
331 u64 fragment, sector_t new_fragment, int *err, 332 unsigned index, sector_t new_fragment, int *err,
332 long *phys, int *new, struct page *locked_page) 333 int *new, struct page *locked_page)
333{ 334{
334 struct super_block *sb = inode->i_sb; 335 struct super_block *sb = inode->i_sb;
335 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 336 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
336 struct buffer_head * result; 337 int shift = uspi->s_apbshift - uspi->s_fpbshift;
337 unsigned blockoff; 338 u64 tmp = 0, goal;
338 u64 tmp, goal, block; 339 struct buffer_head *bh;
339 void *p; 340 void *p;
340 341
341 block = ufs_fragstoblks (fragment); 342 if (!ind_block)
342 blockoff = ufs_fragnum (fragment); 343 return 0;
343
344 UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, metadata %d\n",
345 inode->i_ino, (unsigned long long)fragment,
346 (unsigned long long)new_fragment, !phys);
347 344
348 result = NULL; 345 bh = sb_bread(sb, ind_block + (index >> shift));
349 if (!bh) 346 if (unlikely(!bh)) {
350 goto out; 347 *err = -EIO;
351 if (!buffer_uptodate(bh)) { 348 return 0;
352 ll_rw_block (READ, 1, &bh);
353 wait_on_buffer (bh);
354 if (!buffer_uptodate(bh))
355 goto out;
356 } 349 }
350
351 index &= uspi->s_apbmask >> uspi->s_fpbshift;
357 if (uspi->fs_magic == UFS2_MAGIC) 352 if (uspi->fs_magic == UFS2_MAGIC)
358 p = (__fs64 *)bh->b_data + block; 353 p = (__fs64 *)bh->b_data + index;
359 else 354 else
360 p = (__fs32 *)bh->b_data + block; 355 p = (__fs32 *)bh->b_data + index;
361repeat: 356
362 tmp = ufs_data_ptr_to_cpu(sb, p); 357 tmp = ufs_data_ptr_to_cpu(sb, p);
363 if (tmp) { 358 if (tmp)
364 if (!phys) { 359 goto out;
365 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
366 if (tmp == ufs_data_ptr_to_cpu(sb, p))
367 goto out;
368 brelse (result);
369 goto repeat;
370 } else {
371 *phys = uspi->s_sbbase + tmp + blockoff;
372 goto out;
373 }
374 }
375 360
376 if (block && (uspi->fs_magic == UFS2_MAGIC ? 361 if (index && (uspi->fs_magic == UFS2_MAGIC ?
377 (tmp = fs64_to_cpu(sb, ((__fs64 *)bh->b_data)[block-1])) : 362 (tmp = fs64_to_cpu(sb, ((__fs64 *)bh->b_data)[index-1])) :
378 (tmp = fs32_to_cpu(sb, ((__fs32 *)bh->b_data)[block-1])))) 363 (tmp = fs32_to_cpu(sb, ((__fs32 *)bh->b_data)[index-1]))))
379 goal = tmp + uspi->s_fpb; 364 goal = tmp + uspi->s_fpb;
380 else 365 else
381 goal = bh->b_blocknr + uspi->s_fpb; 366 goal = bh->b_blocknr + uspi->s_fpb;
382 tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal, 367 tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal,
383 uspi->s_fpb, err, locked_page); 368 uspi->s_fpb, err, locked_page);
384 if (!tmp) { 369 if (!tmp)
385 if (ufs_data_ptr_to_cpu(sb, p))
386 goto repeat;
387 goto out; 370 goto out;
388 }
389 371
390 372 if (new)
391 if (!phys) {
392 result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
393 } else {
394 *phys = uspi->s_sbbase + tmp + blockoff;
395 *new = 1; 373 *new = 1;
396 }
397 374
398 mark_buffer_dirty(bh); 375 mark_buffer_dirty(bh);
399 if (IS_SYNC(inode)) 376 if (IS_SYNC(inode))
400 sync_dirty_buffer(bh); 377 sync_dirty_buffer(bh);
401 inode->i_ctime = CURRENT_TIME_SEC; 378 inode->i_ctime = CURRENT_TIME_SEC;
402 mark_inode_dirty(inode); 379 mark_inode_dirty(inode);
403 UFSD("result %llu\n", (unsigned long long)tmp + blockoff);
404out: 380out:
405 brelse (bh); 381 brelse (bh);
406 UFSD("EXIT\n"); 382 UFSD("EXIT\n");
407 return result; 383 if (tmp)
384 tmp += uspi->s_sbbase;
385 return tmp;
408} 386}
409 387
410/** 388/**
@@ -412,103 +390,64 @@ out:
412 * readpage, writepage and so on 390 * readpage, writepage and so on
413 */ 391 */
414 392
415int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create) 393static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
416{ 394{
417 struct super_block * sb = inode->i_sb; 395 struct super_block *sb = inode->i_sb;
418 struct ufs_sb_info * sbi = UFS_SB(sb); 396 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
419 struct ufs_sb_private_info * uspi = sbi->s_uspi; 397 int err = 0, new = 0;
420 struct buffer_head * bh; 398 unsigned offsets[4];
421 int ret, err, new; 399 int depth = ufs_block_to_path(inode, fragment >> uspi->s_fpbshift, offsets);
422 unsigned long ptr,phys;
423 u64 phys64 = 0; 400 u64 phys64 = 0;
424 bool needs_lock = (sbi->mutex_owner != current); 401 unsigned frag = fragment & uspi->s_fpbmask;
425 402
426 if (!create) { 403 if (!create) {
427 phys64 = ufs_frag_map(inode, fragment, needs_lock); 404 phys64 = ufs_frag_map(inode, offsets, depth);
428 UFSD("phys64 = %llu\n", (unsigned long long)phys64); 405 goto out;
429 if (phys64)
430 map_bh(bh_result, sb, phys64);
431 return 0;
432 } 406 }
433 407
434 /* This code entered only while writing ....? */ 408 /* This code entered only while writing ....? */
435 409
436 err = -EIO; 410 mutex_lock(&UFS_I(inode)->truncate_mutex);
437 new = 0;
438 ret = 0;
439 bh = NULL;
440
441 if (needs_lock)
442 lock_ufs(sb);
443 411
444 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); 412 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
445 if (fragment > 413 if (unlikely(!depth)) {
446 ((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb) 414 ufs_warning(sb, "ufs_get_block", "block > big");
447 << uspi->s_fpbshift)) 415 err = -EIO;
448 goto abort_too_big;
449
450 err = 0;
451 ptr = fragment;
452
453 /*
454 * ok, these macros clean the logic up a bit and make
455 * it much more readable:
456 */
457#define GET_INODE_DATABLOCK(x) \
458 ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new,\
459 bh_result->b_page)
460#define GET_INODE_PTR(x) \
461 ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL,\
462 bh_result->b_page)
463#define GET_INDIRECT_DATABLOCK(x) \
464 ufs_inode_getblock(inode, bh, x, fragment, \
465 &err, &phys, &new, bh_result->b_page)
466#define GET_INDIRECT_PTR(x) \
467 ufs_inode_getblock(inode, bh, x, fragment, \
468 &err, NULL, NULL, NULL)
469
470 if (ptr < UFS_NDIR_FRAGMENT) {
471 bh = GET_INODE_DATABLOCK(ptr);
472 goto out; 416 goto out;
473 } 417 }
474 ptr -= UFS_NDIR_FRAGMENT;
475 if (ptr < (1 << (uspi->s_apbshift + uspi->s_fpbshift))) {
476 bh = GET_INODE_PTR(UFS_IND_FRAGMENT + (ptr >> uspi->s_apbshift));
477 goto get_indirect;
478 }
479 ptr -= 1 << (uspi->s_apbshift + uspi->s_fpbshift);
480 if (ptr < (1 << (uspi->s_2apbshift + uspi->s_fpbshift))) {
481 bh = GET_INODE_PTR(UFS_DIND_FRAGMENT + (ptr >> uspi->s_2apbshift));
482 goto get_double;
483 }
484 ptr -= 1 << (uspi->s_2apbshift + uspi->s_fpbshift);
485 bh = GET_INODE_PTR(UFS_TIND_FRAGMENT + (ptr >> uspi->s_3apbshift));
486 bh = GET_INDIRECT_PTR((ptr >> uspi->s_2apbshift) & uspi->s_apbmask);
487get_double:
488 bh = GET_INDIRECT_PTR((ptr >> uspi->s_apbshift) & uspi->s_apbmask);
489get_indirect:
490 bh = GET_INDIRECT_DATABLOCK(ptr & uspi->s_apbmask);
491
492#undef GET_INODE_DATABLOCK
493#undef GET_INODE_PTR
494#undef GET_INDIRECT_DATABLOCK
495#undef GET_INDIRECT_PTR
496 418
497out: 419 if (UFS_I(inode)->i_lastfrag < UFS_NDIR_FRAGMENT) {
498 if (err) 420 unsigned lastfrag = UFS_I(inode)->i_lastfrag;
499 goto abort; 421 unsigned tailfrags = lastfrag & uspi->s_fpbmask;
500 if (new) 422 if (tailfrags && fragment >= lastfrag) {
501 set_buffer_new(bh_result); 423 if (!ufs_extend_tail(inode, fragment,
502 map_bh(bh_result, sb, phys); 424 &err, bh_result->b_page))
503abort: 425 goto out;
504 if (needs_lock) 426 }
505 unlock_ufs(sb); 427 }
506 428
429 if (depth == 1) {
430 phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
431 &err, &new, bh_result->b_page);
432 } else {
433 int i;
434 phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
435 &err, NULL, NULL);
436 for (i = 1; i < depth - 1; i++)
437 phys64 = ufs_inode_getblock(inode, phys64, offsets[i],
438 fragment, &err, NULL, NULL);
439 phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 1],
440 fragment, &err, &new, bh_result->b_page);
441 }
442out:
443 if (phys64) {
444 phys64 += frag;
445 map_bh(bh_result, sb, phys64);
446 if (new)
447 set_buffer_new(bh_result);
448 }
449 mutex_unlock(&UFS_I(inode)->truncate_mutex);
507 return err; 450 return err;
508
509abort_too_big:
510 ufs_warning(sb, "ufs_get_block", "block > big");
511 goto abort;
512} 451}
513 452
514static int ufs_writepage(struct page *page, struct writeback_control *wbc) 453static int ufs_writepage(struct page *page, struct writeback_control *wbc)
@@ -526,12 +465,16 @@ int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)
526 return __block_write_begin(page, pos, len, ufs_getfrag_block); 465 return __block_write_begin(page, pos, len, ufs_getfrag_block);
527} 466}
528 467
468static void ufs_truncate_blocks(struct inode *);
469
529static void ufs_write_failed(struct address_space *mapping, loff_t to) 470static void ufs_write_failed(struct address_space *mapping, loff_t to)
530{ 471{
531 struct inode *inode = mapping->host; 472 struct inode *inode = mapping->host;
532 473
533 if (to > inode->i_size) 474 if (to > inode->i_size) {
534 truncate_pagecache(inode, inode->i_size); 475 truncate_pagecache(inode, inode->i_size);
476 ufs_truncate_blocks(inode);
477 }
535} 478}
536 479
537static int ufs_write_begin(struct file *file, struct address_space *mapping, 480static int ufs_write_begin(struct file *file, struct address_space *mapping,
@@ -548,6 +491,18 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping,
548 return ret; 491 return ret;
549} 492}
550 493
494static int ufs_write_end(struct file *file, struct address_space *mapping,
495 loff_t pos, unsigned len, unsigned copied,
496 struct page *page, void *fsdata)
497{
498 int ret;
499
500 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
501 if (ret < len)
502 ufs_write_failed(mapping, pos + len);
503 return ret;
504}
505
551static sector_t ufs_bmap(struct address_space *mapping, sector_t block) 506static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
552{ 507{
553 return generic_block_bmap(mapping,block,ufs_getfrag_block); 508 return generic_block_bmap(mapping,block,ufs_getfrag_block);
@@ -557,7 +512,7 @@ const struct address_space_operations ufs_aops = {
557 .readpage = ufs_readpage, 512 .readpage = ufs_readpage,
558 .writepage = ufs_writepage, 513 .writepage = ufs_writepage,
559 .write_begin = ufs_write_begin, 514 .write_begin = ufs_write_begin,
560 .write_end = generic_write_end, 515 .write_end = ufs_write_end,
561 .bmap = ufs_bmap 516 .bmap = ufs_bmap
562}; 517};
563 518
@@ -599,7 +554,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
599 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); 554 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
600 return -1; 555 return -1;
601 } 556 }
602 557
603 /* 558 /*
604 * Linux now has 32-bit uid and gid, so we can support EFT. 559 * Linux now has 32-bit uid and gid, so we can support EFT.
605 */ 560 */
@@ -619,7 +574,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
619 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow); 574 ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
620 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag); 575 ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
621 576
622 577
623 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) { 578 if (S_ISCHR(mode) || S_ISBLK(mode) || inode->i_blocks) {
624 memcpy(ufsi->i_u1.i_data, &ufs_inode->ui_u2.ui_addr, 579 memcpy(ufsi->i_u1.i_data, &ufs_inode->ui_u2.ui_addr,
625 sizeof(ufs_inode->ui_u2.ui_addr)); 580 sizeof(ufs_inode->ui_u2.ui_addr));
@@ -753,7 +708,7 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
753 708
754 ufs_set_inode_uid(sb, ufs_inode, i_uid_read(inode)); 709 ufs_set_inode_uid(sb, ufs_inode, i_uid_read(inode));
755 ufs_set_inode_gid(sb, ufs_inode, i_gid_read(inode)); 710 ufs_set_inode_gid(sb, ufs_inode, i_gid_read(inode));
756 711
757 ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); 712 ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
758 ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec); 713 ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
759 ufs_inode->ui_atime.tv_usec = 0; 714 ufs_inode->ui_atime.tv_usec = 0;
@@ -855,23 +810,19 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
855 810
856 ufs1_update_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino)); 811 ufs1_update_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
857 } 812 }
858 813
859 mark_buffer_dirty(bh); 814 mark_buffer_dirty(bh);
860 if (do_sync) 815 if (do_sync)
861 sync_dirty_buffer(bh); 816 sync_dirty_buffer(bh);
862 brelse (bh); 817 brelse (bh);
863 818
864 UFSD("EXIT\n"); 819 UFSD("EXIT\n");
865 return 0; 820 return 0;
866} 821}
867 822
868int ufs_write_inode(struct inode *inode, struct writeback_control *wbc) 823int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
869{ 824{
870 int ret; 825 return ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
871 lock_ufs(inode->i_sb);
872 ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
873 unlock_ufs(inode->i_sb);
874 return ret;
875} 826}
876 827
877int ufs_sync_inode (struct inode *inode) 828int ufs_sync_inode (struct inode *inode)
@@ -888,24 +839,389 @@ void ufs_evict_inode(struct inode * inode)
888 839
889 truncate_inode_pages_final(&inode->i_data); 840 truncate_inode_pages_final(&inode->i_data);
890 if (want_delete) { 841 if (want_delete) {
891 loff_t old_i_size;
892 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
893 lock_ufs(inode->i_sb);
894 mark_inode_dirty(inode);
895 ufs_update_inode(inode, IS_SYNC(inode));
896 old_i_size = inode->i_size;
897 inode->i_size = 0; 842 inode->i_size = 0;
898 if (inode->i_blocks && ufs_truncate(inode, old_i_size)) 843 if (inode->i_blocks)
899 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); 844 ufs_truncate_blocks(inode);
900 unlock_ufs(inode->i_sb);
901 } 845 }
902 846
903 invalidate_inode_buffers(inode); 847 invalidate_inode_buffers(inode);
904 clear_inode(inode); 848 clear_inode(inode);
905 849
906 if (want_delete) { 850 if (want_delete)
907 lock_ufs(inode->i_sb);
908 ufs_free_inode(inode); 851 ufs_free_inode(inode);
909 unlock_ufs(inode->i_sb); 852}
853
854struct to_free {
855 struct inode *inode;
856 u64 to;
857 unsigned count;
858};
859
860static inline void free_data(struct to_free *ctx, u64 from, unsigned count)
861{
862 if (ctx->count && ctx->to != from) {
863 ufs_free_blocks(ctx->inode, ctx->to - ctx->count, ctx->count);
864 ctx->count = 0;
865 }
866 ctx->count += count;
867 ctx->to = from + count;
868}
869
870#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
871#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
872
873static void ufs_trunc_direct(struct inode *inode)
874{
875 struct ufs_inode_info *ufsi = UFS_I(inode);
876 struct super_block * sb;
877 struct ufs_sb_private_info * uspi;
878 void *p;
879 u64 frag1, frag2, frag3, frag4, block1, block2;
880 struct to_free ctx = {.inode = inode};
881 unsigned i, tmp;
882
883 UFSD("ENTER: ino %lu\n", inode->i_ino);
884
885 sb = inode->i_sb;
886 uspi = UFS_SB(sb)->s_uspi;
887
888 frag1 = DIRECT_FRAGMENT;
889 frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
890 frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
891 frag3 = frag4 & ~uspi->s_fpbmask;
892 block1 = block2 = 0;
893 if (frag2 > frag3) {
894 frag2 = frag4;
895 frag3 = frag4 = 0;
896 } else if (frag2 < frag3) {
897 block1 = ufs_fragstoblks (frag2);
898 block2 = ufs_fragstoblks (frag3);
899 }
900
901 UFSD("ino %lu, frag1 %llu, frag2 %llu, block1 %llu, block2 %llu,"
902 " frag3 %llu, frag4 %llu\n", inode->i_ino,
903 (unsigned long long)frag1, (unsigned long long)frag2,
904 (unsigned long long)block1, (unsigned long long)block2,
905 (unsigned long long)frag3, (unsigned long long)frag4);
906
907 if (frag1 >= frag2)
908 goto next1;
909
910 /*
911 * Free first free fragments
912 */
913 p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag1));
914 tmp = ufs_data_ptr_to_cpu(sb, p);
915 if (!tmp )
916 ufs_panic (sb, "ufs_trunc_direct", "internal error");
917 frag2 -= frag1;
918 frag1 = ufs_fragnum (frag1);
919
920 ufs_free_fragments(inode, tmp + frag1, frag2);
921
922next1:
923 /*
924 * Free whole blocks
925 */
926 for (i = block1 ; i < block2; i++) {
927 p = ufs_get_direct_data_ptr(uspi, ufsi, i);
928 tmp = ufs_data_ptr_to_cpu(sb, p);
929 if (!tmp)
930 continue;
931 write_seqlock(&ufsi->meta_lock);
932 ufs_data_ptr_clear(uspi, p);
933 write_sequnlock(&ufsi->meta_lock);
934
935 free_data(&ctx, tmp, uspi->s_fpb);
936 }
937
938 free_data(&ctx, 0, 0);
939
940 if (frag3 >= frag4)
941 goto next3;
942
943 /*
944 * Free last free fragments
945 */
946 p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag3));
947 tmp = ufs_data_ptr_to_cpu(sb, p);
948 if (!tmp )
949 ufs_panic(sb, "ufs_truncate_direct", "internal error");
950 frag4 = ufs_fragnum (frag4);
951 write_seqlock(&ufsi->meta_lock);
952 ufs_data_ptr_clear(uspi, p);
953 write_sequnlock(&ufsi->meta_lock);
954
955 ufs_free_fragments (inode, tmp, frag4);
956 next3:
957
958 UFSD("EXIT: ino %lu\n", inode->i_ino);
959}
960
961static void free_full_branch(struct inode *inode, u64 ind_block, int depth)
962{
963 struct super_block *sb = inode->i_sb;
964 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
965 struct ufs_buffer_head *ubh = ubh_bread(sb, ind_block, uspi->s_bsize);
966 unsigned i;
967
968 if (!ubh)
969 return;
970
971 if (--depth) {
972 for (i = 0; i < uspi->s_apb; i++) {
973 void *p = ubh_get_data_ptr(uspi, ubh, i);
974 u64 block = ufs_data_ptr_to_cpu(sb, p);
975 if (block)
976 free_full_branch(inode, block, depth);
977 }
978 } else {
979 struct to_free ctx = {.inode = inode};
980
981 for (i = 0; i < uspi->s_apb; i++) {
982 void *p = ubh_get_data_ptr(uspi, ubh, i);
983 u64 block = ufs_data_ptr_to_cpu(sb, p);
984 if (block)
985 free_data(&ctx, block, uspi->s_fpb);
986 }
987 free_data(&ctx, 0, 0);
988 }
989
990 ubh_bforget(ubh);
991 ufs_free_blocks(inode, ind_block, uspi->s_fpb);
992}
993
994static void free_branch_tail(struct inode *inode, unsigned from, struct ufs_buffer_head *ubh, int depth)
995{
996 struct super_block *sb = inode->i_sb;
997 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
998 unsigned i;
999
1000 if (--depth) {
1001 for (i = from; i < uspi->s_apb ; i++) {
1002 void *p = ubh_get_data_ptr(uspi, ubh, i);
1003 u64 block = ufs_data_ptr_to_cpu(sb, p);
1004 if (block) {
1005 write_seqlock(&UFS_I(inode)->meta_lock);
1006 ufs_data_ptr_clear(uspi, p);
1007 write_sequnlock(&UFS_I(inode)->meta_lock);
1008 ubh_mark_buffer_dirty(ubh);
1009 free_full_branch(inode, block, depth);
1010 }
1011 }
1012 } else {
1013 struct to_free ctx = {.inode = inode};
1014
1015 for (i = from; i < uspi->s_apb; i++) {
1016 void *p = ubh_get_data_ptr(uspi, ubh, i);
1017 u64 block = ufs_data_ptr_to_cpu(sb, p);
1018 if (block) {
1019 write_seqlock(&UFS_I(inode)->meta_lock);
1020 ufs_data_ptr_clear(uspi, p);
1021 write_sequnlock(&UFS_I(inode)->meta_lock);
1022 ubh_mark_buffer_dirty(ubh);
1023 free_data(&ctx, block, uspi->s_fpb);
1024 }
1025 }
1026 free_data(&ctx, 0, 0);
1027 }
1028 if (IS_SYNC(inode) && ubh_buffer_dirty(ubh))
1029 ubh_sync_block(ubh);
1030 ubh_brelse(ubh);
1031}
1032
1033static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
1034{
1035 int err = 0;
1036 struct super_block *sb = inode->i_sb;
1037 struct address_space *mapping = inode->i_mapping;
1038 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
1039 unsigned i, end;
1040 sector_t lastfrag;
1041 struct page *lastpage;
1042 struct buffer_head *bh;
1043 u64 phys64;
1044
1045 lastfrag = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
1046
1047 if (!lastfrag)
1048 goto out;
1049
1050 lastfrag--;
1051
1052 lastpage = ufs_get_locked_page(mapping, lastfrag >>
1053 (PAGE_CACHE_SHIFT - inode->i_blkbits));
1054 if (IS_ERR(lastpage)) {
1055 err = -EIO;
1056 goto out;
1057 }
1058
1059 end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
1060 bh = page_buffers(lastpage);
1061 for (i = 0; i < end; ++i)
1062 bh = bh->b_this_page;
1063
1064
1065 err = ufs_getfrag_block(inode, lastfrag, bh, 1);
1066
1067 if (unlikely(err))
1068 goto out_unlock;
1069
1070 if (buffer_new(bh)) {
1071 clear_buffer_new(bh);
1072 unmap_underlying_metadata(bh->b_bdev,
1073 bh->b_blocknr);
1074 /*
1075 * we do not zeroize fragment, because of
1076 * if it maped to hole, it already contains zeroes
1077 */
1078 set_buffer_uptodate(bh);
1079 mark_buffer_dirty(bh);
1080 set_page_dirty(lastpage);
1081 }
1082
1083 if (lastfrag >= UFS_IND_FRAGMENT) {
1084 end = uspi->s_fpb - ufs_fragnum(lastfrag) - 1;
1085 phys64 = bh->b_blocknr + 1;
1086 for (i = 0; i < end; ++i) {
1087 bh = sb_getblk(sb, i + phys64);
1088 lock_buffer(bh);
1089 memset(bh->b_data, 0, sb->s_blocksize);
1090 set_buffer_uptodate(bh);
1091 mark_buffer_dirty(bh);
1092 unlock_buffer(bh);
1093 sync_dirty_buffer(bh);
1094 brelse(bh);
1095 }
1096 }
1097out_unlock:
1098 ufs_put_locked_page(lastpage);
1099out:
1100 return err;
1101}
1102
1103static void __ufs_truncate_blocks(struct inode *inode)
1104{
1105 struct ufs_inode_info *ufsi = UFS_I(inode);
1106 struct super_block *sb = inode->i_sb;
1107 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
1108 unsigned offsets[4];
1109 int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
1110 int depth2;
1111 unsigned i;
1112 struct ufs_buffer_head *ubh[3];
1113 void *p;
1114 u64 block;
1115
1116 if (!depth)
1117 return;
1118
1119 /* find the last non-zero in offsets[] */
1120 for (depth2 = depth - 1; depth2; depth2--)
1121 if (offsets[depth2])
1122 break;
1123
1124 mutex_lock(&ufsi->truncate_mutex);
1125 if (depth == 1) {
1126 ufs_trunc_direct(inode);
1127 offsets[0] = UFS_IND_BLOCK;
1128 } else {
1129 /* get the blocks that should be partially emptied */
1130 p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]);
1131 for (i = 0; i < depth2; i++) {
1132 offsets[i]++; /* next branch is fully freed */
1133 block = ufs_data_ptr_to_cpu(sb, p);
1134 if (!block)
1135 break;
1136 ubh[i] = ubh_bread(sb, block, uspi->s_bsize);
1137 if (!ubh[i]) {
1138 write_seqlock(&ufsi->meta_lock);
1139 ufs_data_ptr_clear(uspi, p);
1140 write_sequnlock(&ufsi->meta_lock);
1141 break;
1142 }
1143 p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
1144 }
1145 while (i--)
1146 free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
1147 }
1148 for (i = offsets[0]; i <= UFS_TIND_BLOCK; i++) {
1149 p = ufs_get_direct_data_ptr(uspi, ufsi, i);
1150 block = ufs_data_ptr_to_cpu(sb, p);
1151 if (block) {
1152 write_seqlock(&ufsi->meta_lock);
1153 ufs_data_ptr_clear(uspi, p);
1154 write_sequnlock(&ufsi->meta_lock);
1155 free_full_branch(inode, block, i - UFS_IND_BLOCK + 1);
1156 }
910 } 1157 }
1158 ufsi->i_lastfrag = DIRECT_FRAGMENT;
1159 mark_inode_dirty(inode);
1160 mutex_unlock(&ufsi->truncate_mutex);
1161}
1162
1163static int ufs_truncate(struct inode *inode, loff_t size)
1164{
1165 int err = 0;
1166
1167 UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
1168 inode->i_ino, (unsigned long long)size,
1169 (unsigned long long)i_size_read(inode));
1170
1171 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1172 S_ISLNK(inode->i_mode)))
1173 return -EINVAL;
1174 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1175 return -EPERM;
1176
1177 err = ufs_alloc_lastblock(inode, size);
1178
1179 if (err)
1180 goto out;
1181
1182 block_truncate_page(inode->i_mapping, size, ufs_getfrag_block);
1183
1184 truncate_setsize(inode, size);
1185
1186 __ufs_truncate_blocks(inode);
1187 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
1188 mark_inode_dirty(inode);
1189out:
1190 UFSD("EXIT: err %d\n", err);
1191 return err;
1192}
1193
1194void ufs_truncate_blocks(struct inode *inode)
1195{
1196 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1197 S_ISLNK(inode->i_mode)))
1198 return;
1199 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1200 return;
1201 __ufs_truncate_blocks(inode);
1202}
1203
1204int ufs_setattr(struct dentry *dentry, struct iattr *attr)
1205{
1206 struct inode *inode = d_inode(dentry);
1207 unsigned int ia_valid = attr->ia_valid;
1208 int error;
1209
1210 error = inode_change_ok(inode, attr);
1211 if (error)
1212 return error;
1213
1214 if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
1215 error = ufs_truncate(inode, attr->ia_size);
1216 if (error)
1217 return error;
1218 }
1219
1220 setattr_copy(inode, attr);
1221 mark_inode_dirty(inode);
1222 return 0;
911} 1223}
1224
1225const struct inode_operations ufs_file_inode_operations = {
1226 .setattr = ufs_setattr,
1227};
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 250579a80d90..f6390eec02ca 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -94,22 +94,6 @@
94#include "swab.h" 94#include "swab.h"
95#include "util.h" 95#include "util.h"
96 96
97void lock_ufs(struct super_block *sb)
98{
99 struct ufs_sb_info *sbi = UFS_SB(sb);
100
101 mutex_lock(&sbi->mutex);
102 sbi->mutex_owner = current;
103}
104
105void unlock_ufs(struct super_block *sb)
106{
107 struct ufs_sb_info *sbi = UFS_SB(sb);
108
109 sbi->mutex_owner = NULL;
110 mutex_unlock(&sbi->mutex);
111}
112
113static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) 97static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
114{ 98{
115 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 99 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -694,7 +678,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
694 struct ufs_super_block_third * usb3; 678 struct ufs_super_block_third * usb3;
695 unsigned flags; 679 unsigned flags;
696 680
697 lock_ufs(sb);
698 mutex_lock(&UFS_SB(sb)->s_lock); 681 mutex_lock(&UFS_SB(sb)->s_lock);
699 682
700 UFSD("ENTER\n"); 683 UFSD("ENTER\n");
@@ -714,7 +697,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
714 697
715 UFSD("EXIT\n"); 698 UFSD("EXIT\n");
716 mutex_unlock(&UFS_SB(sb)->s_lock); 699 mutex_unlock(&UFS_SB(sb)->s_lock);
717 unlock_ufs(sb);
718 700
719 return 0; 701 return 0;
720} 702}
@@ -758,7 +740,6 @@ static void ufs_put_super(struct super_block *sb)
758 740
759 ubh_brelse_uspi (sbi->s_uspi); 741 ubh_brelse_uspi (sbi->s_uspi);
760 kfree (sbi->s_uspi); 742 kfree (sbi->s_uspi);
761 mutex_destroy(&sbi->mutex);
762 kfree (sbi); 743 kfree (sbi);
763 sb->s_fs_info = NULL; 744 sb->s_fs_info = NULL;
764 UFSD("EXIT\n"); 745 UFSD("EXIT\n");
@@ -801,7 +782,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
801 782
802 UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); 783 UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
803 784
804 mutex_init(&sbi->mutex);
805 mutex_init(&sbi->s_lock); 785 mutex_init(&sbi->s_lock);
806 spin_lock_init(&sbi->work_lock); 786 spin_lock_init(&sbi->work_lock);
807 INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); 787 INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
@@ -1257,7 +1237,6 @@ magic_found:
1257 return 0; 1237 return 0;
1258 1238
1259failed: 1239failed:
1260 mutex_destroy(&sbi->mutex);
1261 if (ubh) 1240 if (ubh)
1262 ubh_brelse_uspi (uspi); 1241 ubh_brelse_uspi (uspi);
1263 kfree (uspi); 1242 kfree (uspi);
@@ -1280,7 +1259,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1280 unsigned flags; 1259 unsigned flags;
1281 1260
1282 sync_filesystem(sb); 1261 sync_filesystem(sb);
1283 lock_ufs(sb);
1284 mutex_lock(&UFS_SB(sb)->s_lock); 1262 mutex_lock(&UFS_SB(sb)->s_lock);
1285 uspi = UFS_SB(sb)->s_uspi; 1263 uspi = UFS_SB(sb)->s_uspi;
1286 flags = UFS_SB(sb)->s_flags; 1264 flags = UFS_SB(sb)->s_flags;
@@ -1296,7 +1274,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1296 ufs_set_opt (new_mount_opt, ONERROR_LOCK); 1274 ufs_set_opt (new_mount_opt, ONERROR_LOCK);
1297 if (!ufs_parse_options (data, &new_mount_opt)) { 1275 if (!ufs_parse_options (data, &new_mount_opt)) {
1298 mutex_unlock(&UFS_SB(sb)->s_lock); 1276 mutex_unlock(&UFS_SB(sb)->s_lock);
1299 unlock_ufs(sb);
1300 return -EINVAL; 1277 return -EINVAL;
1301 } 1278 }
1302 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { 1279 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
@@ -1304,14 +1281,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1304 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { 1281 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
1305 pr_err("ufstype can't be changed during remount\n"); 1282 pr_err("ufstype can't be changed during remount\n");
1306 mutex_unlock(&UFS_SB(sb)->s_lock); 1283 mutex_unlock(&UFS_SB(sb)->s_lock);
1307 unlock_ufs(sb);
1308 return -EINVAL; 1284 return -EINVAL;
1309 } 1285 }
1310 1286
1311 if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { 1287 if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
1312 UFS_SB(sb)->s_mount_opt = new_mount_opt; 1288 UFS_SB(sb)->s_mount_opt = new_mount_opt;
1313 mutex_unlock(&UFS_SB(sb)->s_lock); 1289 mutex_unlock(&UFS_SB(sb)->s_lock);
1314 unlock_ufs(sb);
1315 return 0; 1290 return 0;
1316 } 1291 }
1317 1292
@@ -1335,7 +1310,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1335#ifndef CONFIG_UFS_FS_WRITE 1310#ifndef CONFIG_UFS_FS_WRITE
1336 pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); 1311 pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
1337 mutex_unlock(&UFS_SB(sb)->s_lock); 1312 mutex_unlock(&UFS_SB(sb)->s_lock);
1338 unlock_ufs(sb);
1339 return -EINVAL; 1313 return -EINVAL;
1340#else 1314#else
1341 if (ufstype != UFS_MOUNT_UFSTYPE_SUN && 1315 if (ufstype != UFS_MOUNT_UFSTYPE_SUN &&
@@ -1345,13 +1319,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1345 ufstype != UFS_MOUNT_UFSTYPE_UFS2) { 1319 ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
1346 pr_err("this ufstype is read-only supported\n"); 1320 pr_err("this ufstype is read-only supported\n");
1347 mutex_unlock(&UFS_SB(sb)->s_lock); 1321 mutex_unlock(&UFS_SB(sb)->s_lock);
1348 unlock_ufs(sb);
1349 return -EINVAL; 1322 return -EINVAL;
1350 } 1323 }
1351 if (!ufs_read_cylinder_structures(sb)) { 1324 if (!ufs_read_cylinder_structures(sb)) {
1352 pr_err("failed during remounting\n"); 1325 pr_err("failed during remounting\n");
1353 mutex_unlock(&UFS_SB(sb)->s_lock); 1326 mutex_unlock(&UFS_SB(sb)->s_lock);
1354 unlock_ufs(sb);
1355 return -EPERM; 1327 return -EPERM;
1356 } 1328 }
1357 sb->s_flags &= ~MS_RDONLY; 1329 sb->s_flags &= ~MS_RDONLY;
@@ -1359,7 +1331,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1359 } 1331 }
1360 UFS_SB(sb)->s_mount_opt = new_mount_opt; 1332 UFS_SB(sb)->s_mount_opt = new_mount_opt;
1361 mutex_unlock(&UFS_SB(sb)->s_lock); 1333 mutex_unlock(&UFS_SB(sb)->s_lock);
1362 unlock_ufs(sb);
1363 return 0; 1334 return 0;
1364} 1335}
1365 1336
@@ -1391,8 +1362,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
1391 struct ufs_super_block_third *usb3; 1362 struct ufs_super_block_third *usb3;
1392 u64 id = huge_encode_dev(sb->s_bdev->bd_dev); 1363 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
1393 1364
1394 lock_ufs(sb); 1365 mutex_lock(&UFS_SB(sb)->s_lock);
1395
1396 usb3 = ubh_get_usb_third(uspi); 1366 usb3 = ubh_get_usb_third(uspi);
1397 1367
1398 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { 1368 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
@@ -1413,7 +1383,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
1413 buf->f_fsid.val[0] = (u32)id; 1383 buf->f_fsid.val[0] = (u32)id;
1414 buf->f_fsid.val[1] = (u32)(id >> 32); 1384 buf->f_fsid.val[1] = (u32)(id >> 32);
1415 1385
1416 unlock_ufs(sb); 1386 mutex_unlock(&UFS_SB(sb)->s_lock);
1417 1387
1418 return 0; 1388 return 0;
1419} 1389}
@@ -1429,6 +1399,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
1429 return NULL; 1399 return NULL;
1430 1400
1431 ei->vfs_inode.i_version = 1; 1401 ei->vfs_inode.i_version = 1;
1402 seqlock_init(&ei->meta_lock);
1403 mutex_init(&ei->truncate_mutex);
1432 return &ei->vfs_inode; 1404 return &ei->vfs_inode;
1433} 1405}
1434 1406
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
deleted file mode 100644
index 21154704c168..000000000000
--- a/fs/ufs/truncate.c
+++ /dev/null
@@ -1,523 +0,0 @@
1/*
2 * linux/fs/ufs/truncate.c
3 *
4 * Copyright (C) 1998
5 * Daniel Pirkl <daniel.pirkl@email.cz>
6 * Charles University, Faculty of Mathematics and Physics
7 *
8 * from
9 *
10 * linux/fs/ext2/truncate.c
11 *
12 * Copyright (C) 1992, 1993, 1994, 1995
13 * Remy Card (card@masi.ibp.fr)
14 * Laboratoire MASI - Institut Blaise Pascal
15 * Universite Pierre et Marie Curie (Paris VI)
16 *
17 * from
18 *
19 * linux/fs/minix/truncate.c
20 *
21 * Copyright (C) 1991, 1992 Linus Torvalds
22 *
23 * Big-endian to little-endian byte-swapping/bitmaps by
24 * David S. Miller (davem@caip.rutgers.edu), 1995
25 */
26
27/*
28 * Real random numbers for secure rm added 94/02/18
29 * Idea from Pierre del Perugia <delperug@gla.ecoledoc.ibp.fr>
30 */
31
32/*
33 * Adoptation to use page cache and UFS2 write support by
34 * Evgeniy Dushistov <dushistov@mail.ru>, 2006-2007
35 */
36
37#include <linux/errno.h>
38#include <linux/fs.h>
39#include <linux/fcntl.h>
40#include <linux/time.h>
41#include <linux/stat.h>
42#include <linux/string.h>
43#include <linux/buffer_head.h>
44#include <linux/blkdev.h>
45#include <linux/sched.h>
46
47#include "ufs_fs.h"
48#include "ufs.h"
49#include "swab.h"
50#include "util.h"
51
52/*
53 * Secure deletion currently doesn't work. It interacts very badly
54 * with buffers shared with memory mappings, and for that reason
55 * can't be done in the truncate() routines. It should instead be
56 * done separately in "release()" before calling the truncate routines
57 * that will release the actual file blocks.
58 *
59 * Linus
60 */
61
62#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
63#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
64
65
66static int ufs_trunc_direct(struct inode *inode)
67{
68 struct ufs_inode_info *ufsi = UFS_I(inode);
69 struct super_block * sb;
70 struct ufs_sb_private_info * uspi;
71 void *p;
72 u64 frag1, frag2, frag3, frag4, block1, block2;
73 unsigned frag_to_free, free_count;
74 unsigned i, tmp;
75 int retry;
76
77 UFSD("ENTER: ino %lu\n", inode->i_ino);
78
79 sb = inode->i_sb;
80 uspi = UFS_SB(sb)->s_uspi;
81
82 frag_to_free = 0;
83 free_count = 0;
84 retry = 0;
85
86 frag1 = DIRECT_FRAGMENT;
87 frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
88 frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
89 frag3 = frag4 & ~uspi->s_fpbmask;
90 block1 = block2 = 0;
91 if (frag2 > frag3) {
92 frag2 = frag4;
93 frag3 = frag4 = 0;
94 } else if (frag2 < frag3) {
95 block1 = ufs_fragstoblks (frag2);
96 block2 = ufs_fragstoblks (frag3);
97 }
98
99 UFSD("ino %lu, frag1 %llu, frag2 %llu, block1 %llu, block2 %llu,"
100 " frag3 %llu, frag4 %llu\n", inode->i_ino,
101 (unsigned long long)frag1, (unsigned long long)frag2,
102 (unsigned long long)block1, (unsigned long long)block2,
103 (unsigned long long)frag3, (unsigned long long)frag4);
104
105 if (frag1 >= frag2)
106 goto next1;
107
108 /*
109 * Free first free fragments
110 */
111 p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag1));
112 tmp = ufs_data_ptr_to_cpu(sb, p);
113 if (!tmp )
114 ufs_panic (sb, "ufs_trunc_direct", "internal error");
115 frag2 -= frag1;
116 frag1 = ufs_fragnum (frag1);
117
118 ufs_free_fragments(inode, tmp + frag1, frag2);
119 mark_inode_dirty(inode);
120 frag_to_free = tmp + frag1;
121
122next1:
123 /*
124 * Free whole blocks
125 */
126 for (i = block1 ; i < block2; i++) {
127 p = ufs_get_direct_data_ptr(uspi, ufsi, i);
128 tmp = ufs_data_ptr_to_cpu(sb, p);
129 if (!tmp)
130 continue;
131 ufs_data_ptr_clear(uspi, p);
132
133 if (free_count == 0) {
134 frag_to_free = tmp;
135 free_count = uspi->s_fpb;
136 } else if (free_count > 0 && frag_to_free == tmp - free_count)
137 free_count += uspi->s_fpb;
138 else {
139 ufs_free_blocks (inode, frag_to_free, free_count);
140 frag_to_free = tmp;
141 free_count = uspi->s_fpb;
142 }
143 mark_inode_dirty(inode);
144 }
145
146 if (free_count > 0)
147 ufs_free_blocks (inode, frag_to_free, free_count);
148
149 if (frag3 >= frag4)
150 goto next3;
151
152 /*
153 * Free last free fragments
154 */
155 p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag3));
156 tmp = ufs_data_ptr_to_cpu(sb, p);
157 if (!tmp )
158 ufs_panic(sb, "ufs_truncate_direct", "internal error");
159 frag4 = ufs_fragnum (frag4);
160 ufs_data_ptr_clear(uspi, p);
161
162 ufs_free_fragments (inode, tmp, frag4);
163 mark_inode_dirty(inode);
164 next3:
165
166 UFSD("EXIT: ino %lu\n", inode->i_ino);
167 return retry;
168}
169
170
171static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
172{
173 struct super_block * sb;
174 struct ufs_sb_private_info * uspi;
175 struct ufs_buffer_head * ind_ubh;
176 void *ind;
177 u64 tmp, indirect_block, i, frag_to_free;
178 unsigned free_count;
179 int retry;
180
181 UFSD("ENTER: ino %lu, offset %llu, p: %p\n",
182 inode->i_ino, (unsigned long long)offset, p);
183
184 BUG_ON(!p);
185
186 sb = inode->i_sb;
187 uspi = UFS_SB(sb)->s_uspi;
188
189 frag_to_free = 0;
190 free_count = 0;
191 retry = 0;
192
193 tmp = ufs_data_ptr_to_cpu(sb, p);
194 if (!tmp)
195 return 0;
196 ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize);
197 if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
198 ubh_brelse (ind_ubh);
199 return 1;
200 }
201 if (!ind_ubh) {
202 ufs_data_ptr_clear(uspi, p);
203 return 0;
204 }
205
206 indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0;
207 for (i = indirect_block; i < uspi->s_apb; i++) {
208 ind = ubh_get_data_ptr(uspi, ind_ubh, i);
209 tmp = ufs_data_ptr_to_cpu(sb, ind);
210 if (!tmp)
211 continue;
212
213 ufs_data_ptr_clear(uspi, ind);
214 ubh_mark_buffer_dirty(ind_ubh);
215 if (free_count == 0) {
216 frag_to_free = tmp;
217 free_count = uspi->s_fpb;
218 } else if (free_count > 0 && frag_to_free == tmp - free_count)
219 free_count += uspi->s_fpb;
220 else {
221 ufs_free_blocks (inode, frag_to_free, free_count);
222 frag_to_free = tmp;
223 free_count = uspi->s_fpb;
224 }
225
226 mark_inode_dirty(inode);
227 }
228
229 if (free_count > 0) {
230 ufs_free_blocks (inode, frag_to_free, free_count);
231 }
232 for (i = 0; i < uspi->s_apb; i++)
233 if (!ufs_is_data_ptr_zero(uspi,
234 ubh_get_data_ptr(uspi, ind_ubh, i)))
235 break;
236 if (i >= uspi->s_apb) {
237 tmp = ufs_data_ptr_to_cpu(sb, p);
238 ufs_data_ptr_clear(uspi, p);
239
240 ufs_free_blocks (inode, tmp, uspi->s_fpb);
241 mark_inode_dirty(inode);
242 ubh_bforget(ind_ubh);
243 ind_ubh = NULL;
244 }
245 if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
246 ubh_sync_block(ind_ubh);
247 ubh_brelse (ind_ubh);
248
249 UFSD("EXIT: ino %lu\n", inode->i_ino);
250
251 return retry;
252}
253
254static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
255{
256 struct super_block * sb;
257 struct ufs_sb_private_info * uspi;
258 struct ufs_buffer_head *dind_bh;
259 u64 i, tmp, dindirect_block;
260 void *dind;
261 int retry = 0;
262
263 UFSD("ENTER: ino %lu\n", inode->i_ino);
264
265 sb = inode->i_sb;
266 uspi = UFS_SB(sb)->s_uspi;
267
268 dindirect_block = (DIRECT_BLOCK > offset)
269 ? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0;
270 retry = 0;
271
272 tmp = ufs_data_ptr_to_cpu(sb, p);
273 if (!tmp)
274 return 0;
275 dind_bh = ubh_bread(sb, tmp, uspi->s_bsize);
276 if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
277 ubh_brelse (dind_bh);
278 return 1;
279 }
280 if (!dind_bh) {
281 ufs_data_ptr_clear(uspi, p);
282 return 0;
283 }
284
285 for (i = dindirect_block ; i < uspi->s_apb ; i++) {
286 dind = ubh_get_data_ptr(uspi, dind_bh, i);
287 tmp = ufs_data_ptr_to_cpu(sb, dind);
288 if (!tmp)
289 continue;
290 retry |= ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
291 ubh_mark_buffer_dirty(dind_bh);
292 }
293
294 for (i = 0; i < uspi->s_apb; i++)
295 if (!ufs_is_data_ptr_zero(uspi,
296 ubh_get_data_ptr(uspi, dind_bh, i)))
297 break;
298 if (i >= uspi->s_apb) {
299 tmp = ufs_data_ptr_to_cpu(sb, p);
300 ufs_data_ptr_clear(uspi, p);
301
302 ufs_free_blocks(inode, tmp, uspi->s_fpb);
303 mark_inode_dirty(inode);
304 ubh_bforget(dind_bh);
305 dind_bh = NULL;
306 }
307 if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
308 ubh_sync_block(dind_bh);
309 ubh_brelse (dind_bh);
310
311 UFSD("EXIT: ino %lu\n", inode->i_ino);
312
313 return retry;
314}
315
316static int ufs_trunc_tindirect(struct inode *inode)
317{
318 struct super_block *sb = inode->i_sb;
319 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
320 struct ufs_inode_info *ufsi = UFS_I(inode);
321 struct ufs_buffer_head * tind_bh;
322 u64 tindirect_block, tmp, i;
323 void *tind, *p;
324 int retry;
325
326 UFSD("ENTER: ino %lu\n", inode->i_ino);
327
328 retry = 0;
329
330 tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
331 ? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
332
333 p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
334 if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
335 return 0;
336 tind_bh = ubh_bread (sb, tmp, uspi->s_bsize);
337 if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
338 ubh_brelse (tind_bh);
339 return 1;
340 }
341 if (!tind_bh) {
342 ufs_data_ptr_clear(uspi, p);
343 return 0;
344 }
345
346 for (i = tindirect_block ; i < uspi->s_apb ; i++) {
347 tind = ubh_get_data_ptr(uspi, tind_bh, i);
348 retry |= ufs_trunc_dindirect(inode, UFS_NDADDR +
349 uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind);
350 ubh_mark_buffer_dirty(tind_bh);
351 }
352 for (i = 0; i < uspi->s_apb; i++)
353 if (!ufs_is_data_ptr_zero(uspi,
354 ubh_get_data_ptr(uspi, tind_bh, i)))
355 break;
356 if (i >= uspi->s_apb) {
357 tmp = ufs_data_ptr_to_cpu(sb, p);
358 ufs_data_ptr_clear(uspi, p);
359
360 ufs_free_blocks(inode, tmp, uspi->s_fpb);
361 mark_inode_dirty(inode);
362 ubh_bforget(tind_bh);
363 tind_bh = NULL;
364 }
365 if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
366 ubh_sync_block(tind_bh);
367 ubh_brelse (tind_bh);
368
369 UFSD("EXIT: ino %lu\n", inode->i_ino);
370 return retry;
371}
372
373static int ufs_alloc_lastblock(struct inode *inode)
374{
375 int err = 0;
376 struct super_block *sb = inode->i_sb;
377 struct address_space *mapping = inode->i_mapping;
378 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
379 unsigned i, end;
380 sector_t lastfrag;
381 struct page *lastpage;
382 struct buffer_head *bh;
383 u64 phys64;
384
385 lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
386
387 if (!lastfrag)
388 goto out;
389
390 lastfrag--;
391
392 lastpage = ufs_get_locked_page(mapping, lastfrag >>
393 (PAGE_CACHE_SHIFT - inode->i_blkbits));
394 if (IS_ERR(lastpage)) {
395 err = -EIO;
396 goto out;
397 }
398
399 end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
400 bh = page_buffers(lastpage);
401 for (i = 0; i < end; ++i)
402 bh = bh->b_this_page;
403
404
405 err = ufs_getfrag_block(inode, lastfrag, bh, 1);
406
407 if (unlikely(err))
408 goto out_unlock;
409
410 if (buffer_new(bh)) {
411 clear_buffer_new(bh);
412 unmap_underlying_metadata(bh->b_bdev,
413 bh->b_blocknr);
414 /*
415 * we do not zeroize fragment, because of
416 * if it maped to hole, it already contains zeroes
417 */
418 set_buffer_uptodate(bh);
419 mark_buffer_dirty(bh);
420 set_page_dirty(lastpage);
421 }
422
423 if (lastfrag >= UFS_IND_FRAGMENT) {
424 end = uspi->s_fpb - ufs_fragnum(lastfrag) - 1;
425 phys64 = bh->b_blocknr + 1;
426 for (i = 0; i < end; ++i) {
427 bh = sb_getblk(sb, i + phys64);
428 lock_buffer(bh);
429 memset(bh->b_data, 0, sb->s_blocksize);
430 set_buffer_uptodate(bh);
431 mark_buffer_dirty(bh);
432 unlock_buffer(bh);
433 sync_dirty_buffer(bh);
434 brelse(bh);
435 }
436 }
437out_unlock:
438 ufs_put_locked_page(lastpage);
439out:
440 return err;
441}
442
443int ufs_truncate(struct inode *inode, loff_t old_i_size)
444{
445 struct ufs_inode_info *ufsi = UFS_I(inode);
446 struct super_block *sb = inode->i_sb;
447 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
448 int retry, err = 0;
449
450 UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
451 inode->i_ino, (unsigned long long)i_size_read(inode),
452 (unsigned long long)old_i_size);
453
454 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
455 S_ISLNK(inode->i_mode)))
456 return -EINVAL;
457 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
458 return -EPERM;
459
460 err = ufs_alloc_lastblock(inode);
461
462 if (err) {
463 i_size_write(inode, old_i_size);
464 goto out;
465 }
466
467 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
468
469 while (1) {
470 retry = ufs_trunc_direct(inode);
471 retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
472 ufs_get_direct_data_ptr(uspi, ufsi,
473 UFS_IND_BLOCK));
474 retry |= ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
475 ufs_get_direct_data_ptr(uspi, ufsi,
476 UFS_DIND_BLOCK));
477 retry |= ufs_trunc_tindirect (inode);
478 if (!retry)
479 break;
480 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
481 ufs_sync_inode (inode);
482 yield();
483 }
484
485 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
486 ufsi->i_lastfrag = DIRECT_FRAGMENT;
487 mark_inode_dirty(inode);
488out:
489 UFSD("EXIT: err %d\n", err);
490 return err;
491}
492
493int ufs_setattr(struct dentry *dentry, struct iattr *attr)
494{
495 struct inode *inode = d_inode(dentry);
496 unsigned int ia_valid = attr->ia_valid;
497 int error;
498
499 error = inode_change_ok(inode, attr);
500 if (error)
501 return error;
502
503 if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
504 loff_t old_i_size = inode->i_size;
505
506 /* XXX(truncate): truncate_setsize should be called last */
507 truncate_setsize(inode, attr->ia_size);
508
509 lock_ufs(inode->i_sb);
510 error = ufs_truncate(inode, old_i_size);
511 unlock_ufs(inode->i_sb);
512 if (error)
513 return error;
514 }
515
516 setattr_copy(inode, attr);
517 mark_inode_dirty(inode);
518 return 0;
519}
520
521const struct inode_operations ufs_file_inode_operations = {
522 .setattr = ufs_setattr,
523};
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 2e31ea2e35a3..7da4aca868c0 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -24,8 +24,6 @@ struct ufs_sb_info {
24 unsigned s_cgno[UFS_MAX_GROUP_LOADED]; 24 unsigned s_cgno[UFS_MAX_GROUP_LOADED];
25 unsigned short s_cg_loaded; 25 unsigned short s_cg_loaded;
26 unsigned s_mount_opt; 26 unsigned s_mount_opt;
27 struct mutex mutex;
28 struct task_struct *mutex_owner;
29 struct super_block *sb; 27 struct super_block *sb;
30 int work_queued; /* non-zero if the delayed work is queued */ 28 int work_queued; /* non-zero if the delayed work is queued */
31 struct delayed_work sync_work; /* FS sync delayed work */ 29 struct delayed_work sync_work; /* FS sync delayed work */
@@ -46,6 +44,8 @@ struct ufs_inode_info {
46 __u32 i_oeftflag; 44 __u32 i_oeftflag;
47 __u16 i_osync; 45 __u16 i_osync;
48 __u64 i_lastfrag; 46 __u64 i_lastfrag;
47 seqlock_t meta_lock;
48 struct mutex truncate_mutex;
49 __u32 i_dir_start_lookup; 49 __u32 i_dir_start_lookup;
50 struct inode vfs_inode; 50 struct inode vfs_inode;
51}; 51};
@@ -122,7 +122,7 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
122extern int ufs_write_inode (struct inode *, struct writeback_control *); 122extern int ufs_write_inode (struct inode *, struct writeback_control *);
123extern int ufs_sync_inode (struct inode *); 123extern int ufs_sync_inode (struct inode *);
124extern void ufs_evict_inode (struct inode *); 124extern void ufs_evict_inode (struct inode *);
125extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); 125extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
126 126
127/* namei.c */ 127/* namei.c */
128extern const struct file_operations ufs_dir_operations; 128extern const struct file_operations ufs_dir_operations;
@@ -140,10 +140,6 @@ void ufs_mark_sb_dirty(struct super_block *sb);
140extern const struct inode_operations ufs_fast_symlink_inode_operations; 140extern const struct inode_operations ufs_fast_symlink_inode_operations;
141extern const struct inode_operations ufs_symlink_inode_operations; 141extern const struct inode_operations ufs_symlink_inode_operations;
142 142
143/* truncate.c */
144extern int ufs_truncate (struct inode *, loff_t);
145extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
146
147static inline struct ufs_sb_info *UFS_SB(struct super_block *sb) 143static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
148{ 144{
149 return sb->s_fs_info; 145 return sb->s_fs_info;
@@ -170,7 +166,4 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b)
170 return do_div(b, uspi->s_fpg); 166 return do_div(b, uspi->s_fpg);
171} 167}
172 168
173extern void lock_ufs(struct super_block *sb);
174extern void unlock_ufs(struct super_block *sb);
175
176#endif /* _UFS_UFS_H */ 169#endif /* _UFS_UFS_H */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index c77499bcbd7a..cc2a321f774b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -119,8 +119,7 @@ xfs_setfilesize_trans_alloc(
119 * We may pass freeze protection with a transaction. So tell lockdep 119 * We may pass freeze protection with a transaction. So tell lockdep
120 * we released it. 120 * we released it.
121 */ 121 */
122 rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], 122 __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
123 1, _THIS_IP_);
124 /* 123 /*
125 * We hand off the transaction to the completion thread now, so 124 * We hand off the transaction to the completion thread now, so
126 * clear the flag here. 125 * clear the flag here.
@@ -171,8 +170,7 @@ xfs_setfilesize_ioend(
171 * Similarly for freeze protection. 170 * Similarly for freeze protection.
172 */ 171 */
173 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 172 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
174 rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], 173 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
175 0, 1, _THIS_IP_);
176 174
177 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 175 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
178} 176}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dc634a55163b..b2f9b9c25e41 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1,7 +1,6 @@
1#ifndef _LINUX_FS_H 1#ifndef _LINUX_FS_H
2#define _LINUX_FS_H 2#define _LINUX_FS_H
3 3
4
5#include <linux/linkage.h> 4#include <linux/linkage.h>
6#include <linux/wait.h> 5#include <linux/wait.h>
7#include <linux/kdev_t.h> 6#include <linux/kdev_t.h>
@@ -30,6 +29,8 @@
30#include <linux/lockdep.h> 29#include <linux/lockdep.h>
31#include <linux/percpu-rwsem.h> 30#include <linux/percpu-rwsem.h>
32#include <linux/blk_types.h> 31#include <linux/blk_types.h>
32#include <linux/workqueue.h>
33#include <linux/percpu-rwsem.h>
33 34
34#include <asm/byteorder.h> 35#include <asm/byteorder.h>
35#include <uapi/linux/fs.h> 36#include <uapi/linux/fs.h>
@@ -636,7 +637,7 @@ struct inode {
636 unsigned long dirtied_time_when; 637 unsigned long dirtied_time_when;
637 638
638 struct hlist_node i_hash; 639 struct hlist_node i_hash;
639 struct list_head i_wb_list; /* backing dev IO list */ 640 struct list_head i_io_list; /* backing dev IO list */
640#ifdef CONFIG_CGROUP_WRITEBACK 641#ifdef CONFIG_CGROUP_WRITEBACK
641 struct bdi_writeback *i_wb; /* the associated cgroup wb */ 642 struct bdi_writeback *i_wb; /* the associated cgroup wb */
642 643
@@ -1281,16 +1282,9 @@ enum {
1281#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) 1282#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
1282 1283
1283struct sb_writers { 1284struct sb_writers {
1284 /* Counters for counting writers at each level */ 1285 int frozen; /* Is sb frozen? */
1285 struct percpu_counter counter[SB_FREEZE_LEVELS]; 1286 wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */
1286 wait_queue_head_t wait; /* queue for waiting for 1287 struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
1287 writers / faults to finish */
1288 int frozen; /* Is sb frozen? */
1289 wait_queue_head_t wait_unfrozen; /* queue for waiting for
1290 sb to be thawed */
1291#ifdef CONFIG_DEBUG_LOCK_ALLOC
1292 struct lockdep_map lock_map[SB_FREEZE_LEVELS];
1293#endif
1294}; 1288};
1295 1289
1296struct super_block { 1290struct super_block {
@@ -1316,7 +1310,6 @@ struct super_block {
1316#endif 1310#endif
1317 const struct xattr_handler **s_xattr; 1311 const struct xattr_handler **s_xattr;
1318 1312
1319 struct list_head s_inodes; /* all inodes */
1320 struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ 1313 struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
1321 struct list_head s_mounts; /* list of mounts; _not_ for fs use */ 1314 struct list_head s_mounts; /* list of mounts; _not_ for fs use */
1322 struct block_device *s_bdev; 1315 struct block_device *s_bdev;
@@ -1382,11 +1375,18 @@ struct super_block {
1382 struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; 1375 struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
1383 struct list_lru s_inode_lru ____cacheline_aligned_in_smp; 1376 struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
1384 struct rcu_head rcu; 1377 struct rcu_head rcu;
1378 struct work_struct destroy_work;
1379
1380 struct mutex s_sync_lock; /* sync serialisation lock */
1385 1381
1386 /* 1382 /*
1387 * Indicates how deep in a filesystem stack this SB is 1383 * Indicates how deep in a filesystem stack this SB is
1388 */ 1384 */
1389 int s_stack_depth; 1385 int s_stack_depth;
1386
1387 /* s_inode_list_lock protects s_inodes */
1388 spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp;
1389 struct list_head s_inodes; /* all inodes */
1390}; 1390};
1391 1391
1392extern struct timespec current_fs_time(struct super_block *sb); 1392extern struct timespec current_fs_time(struct super_block *sb);
@@ -1398,6 +1398,11 @@ extern struct timespec current_fs_time(struct super_block *sb);
1398void __sb_end_write(struct super_block *sb, int level); 1398void __sb_end_write(struct super_block *sb, int level);
1399int __sb_start_write(struct super_block *sb, int level, bool wait); 1399int __sb_start_write(struct super_block *sb, int level, bool wait);
1400 1400
1401#define __sb_writers_acquired(sb, lev) \
1402 percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
1403#define __sb_writers_release(sb, lev) \
1404 percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
1405
1401/** 1406/**
1402 * sb_end_write - drop write access to a superblock 1407 * sb_end_write - drop write access to a superblock
1403 * @sb: the super we wrote to 1408 * @sb: the super we wrote to
@@ -2614,7 +2619,7 @@ static inline void insert_inode_hash(struct inode *inode)
2614extern void __remove_inode_hash(struct inode *); 2619extern void __remove_inode_hash(struct inode *);
2615static inline void remove_inode_hash(struct inode *inode) 2620static inline void remove_inode_hash(struct inode *inode)
2616{ 2621{
2617 if (!inode_unhashed(inode)) 2622 if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
2618 __remove_inode_hash(inode); 2623 __remove_inode_hash(inode);
2619} 2624}
2620 2625
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index e0727d77feaf..533c4408529a 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -368,7 +368,7 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un
368extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); 368extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
369extern void fsnotify_get_mark(struct fsnotify_mark *mark); 369extern void fsnotify_get_mark(struct fsnotify_mark *mark);
370extern void fsnotify_put_mark(struct fsnotify_mark *mark); 370extern void fsnotify_put_mark(struct fsnotify_mark *mark);
371extern void fsnotify_unmount_inodes(struct list_head *list); 371extern void fsnotify_unmount_inodes(struct super_block *sb);
372 372
373/* put here because inotify does some weird stuff when destroying watches */ 373/* put here because inotify does some weird stuff when destroying watches */
374extern void fsnotify_init_event(struct fsnotify_event *event, 374extern void fsnotify_init_event(struct fsnotify_event *event,
@@ -404,7 +404,7 @@ static inline u32 fsnotify_get_cookie(void)
404 return 0; 404 return 0;
405} 405}
406 406
407static inline void fsnotify_unmount_inodes(struct list_head *list) 407static inline void fsnotify_unmount_inodes(struct super_block *sb)
408{} 408{}
409 409
410#endif /* CONFIG_FSNOTIFY */ 410#endif /* CONFIG_FSNOTIFY */
diff --git a/include/linux/list.h b/include/linux/list.h
index feb773c76ee0..3e3e64a61002 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -672,6 +672,11 @@ static inline void hlist_add_fake(struct hlist_node *n)
672 n->pprev = &n->next; 672 n->pprev = &n->next;
673} 673}
674 674
675static inline bool hlist_fake(struct hlist_node *h)
676{
677 return h->pprev == &h->next;
678}
679
675/* 680/*
676 * Move a list from one list head to another. Fixup the pprev 681 * Move a list from one list head to another. Fixup the pprev
677 * reference of the first entry if it exists. 682 * reference of the first entry if it exists.
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 3e88c9a7d57f..834c4e52cb2d 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -16,6 +16,7 @@ struct percpu_rw_semaphore {
16}; 16};
17 17
18extern void percpu_down_read(struct percpu_rw_semaphore *); 18extern void percpu_down_read(struct percpu_rw_semaphore *);
19extern int percpu_down_read_trylock(struct percpu_rw_semaphore *);
19extern void percpu_up_read(struct percpu_rw_semaphore *); 20extern void percpu_up_read(struct percpu_rw_semaphore *);
20 21
21extern void percpu_down_write(struct percpu_rw_semaphore *); 22extern void percpu_down_write(struct percpu_rw_semaphore *);
@@ -31,4 +32,23 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
31 __percpu_init_rwsem(brw, #brw, &rwsem_key); \ 32 __percpu_init_rwsem(brw, #brw, &rwsem_key); \
32}) 33})
33 34
35
36#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
37
38static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
39 bool read, unsigned long ip)
40{
41 lock_release(&sem->rw_sem.dep_map, 1, ip);
42#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
43 if (!read)
44 sem->rw_sem.owner = NULL;
45#endif
46}
47
48static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
49 bool read, unsigned long ip)
50{
51 lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
52}
53
34#endif 54#endif
diff --git a/init/Kconfig b/init/Kconfig
index 2c0e50ef554a..9cabd866b34b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -927,7 +927,6 @@ config NUMA_BALANCING_DEFAULT_ENABLED
927menuconfig CGROUPS 927menuconfig CGROUPS
928 bool "Control Group support" 928 bool "Control Group support"
929 select KERNFS 929 select KERNFS
930 select PERCPU_RWSEM
931 help 930 help
932 This option adds support for grouping sets of processes together, for 931 This option adds support for grouping sets of processes together, for
933 use with process control subsystems such as Cpusets, CFS, memory 932 use with process control subsystems such as Cpusets, CFS, memory
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 36942047ffc0..8e96f6cc2a4a 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,5 +1,5 @@
1 1
2obj-y += mutex.o semaphore.o rwsem.o 2obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
3 3
4ifdef CONFIG_FUNCTION_TRACER 4ifdef CONFIG_FUNCTION_TRACER
5CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) 5CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
@@ -24,6 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
24obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o 24obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
25obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o 25obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
26obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o 26obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
27obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
28obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o 27obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
29obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o 28obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 652a8ee8efe9..f32567254867 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -88,6 +88,19 @@ void percpu_down_read(struct percpu_rw_semaphore *brw)
88 __up_read(&brw->rw_sem); 88 __up_read(&brw->rw_sem);
89} 89}
90 90
91int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
92{
93 if (unlikely(!update_fast_ctr(brw, +1))) {
94 if (!__down_read_trylock(&brw->rw_sem))
95 return 0;
96 atomic_inc(&brw->slow_read_ctr);
97 __up_read(&brw->rw_sem);
98 }
99
100 rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 1, _RET_IP_);
101 return 1;
102}
103
91void percpu_up_read(struct percpu_rw_semaphore *brw) 104void percpu_up_read(struct percpu_rw_semaphore *brw)
92{ 105{
93 rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); 106 rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
diff --git a/lib/Kconfig b/lib/Kconfig
index a16555281d53..8a49ff9d1502 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -53,9 +53,6 @@ config GENERIC_IO
53config STMP_DEVICE 53config STMP_DEVICE
54 bool 54 bool
55 55
56config PERCPU_RWSEM
57 bool
58
59config ARCH_USE_CMPXCHG_LOCKREF 56config ARCH_USE_CMPXCHG_LOCKREF
60 bool 57 bool
61 58
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index dac5bf59309d..ee8d7fd07be3 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -55,13 +55,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
55 55
56 nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0; 56 nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
57 spin_lock(&wb->list_lock); 57 spin_lock(&wb->list_lock);
58 list_for_each_entry(inode, &wb->b_dirty, i_wb_list) 58 list_for_each_entry(inode, &wb->b_dirty, i_io_list)
59 nr_dirty++; 59 nr_dirty++;
60 list_for_each_entry(inode, &wb->b_io, i_wb_list) 60 list_for_each_entry(inode, &wb->b_io, i_io_list)
61 nr_io++; 61 nr_io++;
62 list_for_each_entry(inode, &wb->b_more_io, i_wb_list) 62 list_for_each_entry(inode, &wb->b_more_io, i_io_list)
63 nr_more_io++; 63 nr_more_io++;
64 list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list) 64 list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
65 if (inode->i_state & I_DIRTY_TIME) 65 if (inode->i_state & I_DIRTY_TIME)
66 nr_dirty_time++; 66 nr_dirty_time++;
67 spin_unlock(&wb->list_lock); 67 spin_unlock(&wb->list_lock);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 41e4581af7c5..aebc2dd6e649 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2143,11 +2143,10 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
2143 if (S_ISBLK(inode->i_mode)) { 2143 if (S_ISBLK(inode->i_mode)) {
2144 p->bdev = bdgrab(I_BDEV(inode)); 2144 p->bdev = bdgrab(I_BDEV(inode));
2145 error = blkdev_get(p->bdev, 2145 error = blkdev_get(p->bdev,
2146 FMODE_READ | FMODE_WRITE | FMODE_EXCL, 2146 FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
2147 sys_swapon);
2148 if (error < 0) { 2147 if (error < 0) {
2149 p->bdev = NULL; 2148 p->bdev = NULL;
2150 return -EINVAL; 2149 return error;
2151 } 2150 }
2152 p->old_block_size = block_size(p->bdev); 2151 p->old_block_size = block_size(p->bdev);
2153 error = set_blocksize(p->bdev, PAGE_SIZE); 2152 error = set_blocksize(p->bdev, PAGE_SIZE);
@@ -2348,7 +2347,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2348 struct filename *name; 2347 struct filename *name;
2349 struct file *swap_file = NULL; 2348 struct file *swap_file = NULL;
2350 struct address_space *mapping; 2349 struct address_space *mapping;
2351 int i;
2352 int prio; 2350 int prio;
2353 int error; 2351 int error;
2354 union swap_header *swap_header; 2352 union swap_header *swap_header;
@@ -2388,19 +2386,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2388 2386
2389 p->swap_file = swap_file; 2387 p->swap_file = swap_file;
2390 mapping = swap_file->f_mapping; 2388 mapping = swap_file->f_mapping;
2391
2392 for (i = 0; i < nr_swapfiles; i++) {
2393 struct swap_info_struct *q = swap_info[i];
2394
2395 if (q == p || !q->swap_file)
2396 continue;
2397 if (mapping == q->swap_file->f_mapping) {
2398 error = -EBUSY;
2399 goto bad_swap;
2400 }
2401 }
2402
2403 inode = mapping->host; 2389 inode = mapping->host;
2390
2404 /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */ 2391 /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
2405 error = claim_swapfile(p, inode); 2392 error = claim_swapfile(p, inode);
2406 if (unlikely(error)) 2393 if (unlikely(error))
@@ -2433,6 +2420,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2433 goto bad_swap; 2420 goto bad_swap;
2434 } 2421 }
2435 if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2422 if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
2423 int cpu;
2424
2436 p->flags |= SWP_SOLIDSTATE; 2425 p->flags |= SWP_SOLIDSTATE;
2437 /* 2426 /*
2438 * select a random position to start with to help wear leveling 2427 * select a random position to start with to help wear leveling
@@ -2451,9 +2440,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2451 error = -ENOMEM; 2440 error = -ENOMEM;
2452 goto bad_swap; 2441 goto bad_swap;
2453 } 2442 }
2454 for_each_possible_cpu(i) { 2443 for_each_possible_cpu(cpu) {
2455 struct percpu_cluster *cluster; 2444 struct percpu_cluster *cluster;
2456 cluster = per_cpu_ptr(p->percpu_cluster, i); 2445 cluster = per_cpu_ptr(p->percpu_cluster, cpu);
2457 cluster_set_null(&cluster->index); 2446 cluster_set_null(&cluster->index);
2458 } 2447 }
2459 } 2448 }