diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-05 23:34:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-05 23:34:28 -0400 |
commit | 7d9071a095023cd1db8fa18fa0d648dc1a5210e0 (patch) | |
tree | 072b462e43912b9dfc321136f3367114dcb8f2b3 /fs/fs-writeback.c | |
parent | bd779669945ed9982890da789ad32e3bd0d41f14 (diff) | |
parent | 397d425dc26da728396e66d392d5dcb8dac30c37 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs updates from Al Viro:
"In this one:
- d_move fixes (Eric Biederman)
- UFS fixes (me; locking is mostly sane now, a bunch of bugs in error
handling ought to be fixed)
- switch of sb_writers to percpu rwsem (Oleg Nesterov)
- superblock scalability (Josef Bacik and Dave Chinner)
- swapon(2) race fix (Hugh Dickins)"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (65 commits)
vfs: Test for and handle paths that are unreachable from their mnt_root
dcache: Reduce the scope of i_lock in d_splice_alias
dcache: Handle escaped paths in prepend_path
mm: fix potential data race in SyS_swapon
inode: don't softlockup when evicting inodes
inode: rename i_wb_list to i_io_list
sync: serialise per-superblock sync operations
inode: convert inode_sb_list_lock to per-sb
inode: add hlist_fake to avoid the inode hash lock in evict
writeback: plug writeback at a high level
change sb_writers to use percpu_rw_semaphore
shift percpu_counter_destroy() into destroy_super_work()
percpu-rwsem: kill CONFIG_PERCPU_RWSEM
percpu-rwsem: introduce percpu_rwsem_release() and percpu_rwsem_acquire()
percpu-rwsem: introduce percpu_down_read_trylock()
document rwsem_release() in sb_wait_write()
fix the broken lockdep logic in __sb_start_write()
introduce __sb_writers_{acquired,release}() helpers
ufs_inode_get{frag,block}(): get rid of 'phys' argument
ufs_getfrag_block(): tidy up a bit
...
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 72 |
1 files changed, 43 insertions, 29 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5fa588e933d5..ae0f438c2ee6 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -88,7 +88,7 @@ unsigned int dirtytime_expire_interval = 12 * 60 * 60; | |||
88 | 88 | ||
89 | static inline struct inode *wb_inode(struct list_head *head) | 89 | static inline struct inode *wb_inode(struct list_head *head) |
90 | { | 90 | { |
91 | return list_entry(head, struct inode, i_wb_list); | 91 | return list_entry(head, struct inode, i_io_list); |
92 | } | 92 | } |
93 | 93 | ||
94 | /* | 94 | /* |
@@ -125,22 +125,22 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb) | |||
125 | } | 125 | } |
126 | 126 | ||
127 | /** | 127 | /** |
128 | * inode_wb_list_move_locked - move an inode onto a bdi_writeback IO list | 128 | * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list |
129 | * @inode: inode to be moved | 129 | * @inode: inode to be moved |
130 | * @wb: target bdi_writeback | 130 | * @wb: target bdi_writeback |
131 | * @head: one of @wb->b_{dirty|io|more_io} | 131 | * @head: one of @wb->b_{dirty|io|more_io} |
132 | * | 132 | * |
133 | * Move @inode->i_wb_list to @list of @wb and set %WB_has_dirty_io. | 133 | * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io. |
134 | * Returns %true if @inode is the first occupant of the !dirty_time IO | 134 | * Returns %true if @inode is the first occupant of the !dirty_time IO |
135 | * lists; otherwise, %false. | 135 | * lists; otherwise, %false. |
136 | */ | 136 | */ |
137 | static bool inode_wb_list_move_locked(struct inode *inode, | 137 | static bool inode_io_list_move_locked(struct inode *inode, |
138 | struct bdi_writeback *wb, | 138 | struct bdi_writeback *wb, |
139 | struct list_head *head) | 139 | struct list_head *head) |
140 | { | 140 | { |
141 | assert_spin_locked(&wb->list_lock); | 141 | assert_spin_locked(&wb->list_lock); |
142 | 142 | ||
143 | list_move(&inode->i_wb_list, head); | 143 | list_move(&inode->i_io_list, head); |
144 | 144 | ||
145 | /* dirty_time doesn't count as dirty_io until expiration */ | 145 | /* dirty_time doesn't count as dirty_io until expiration */ |
146 | if (head != &wb->b_dirty_time) | 146 | if (head != &wb->b_dirty_time) |
@@ -151,19 +151,19 @@ static bool inode_wb_list_move_locked(struct inode *inode, | |||
151 | } | 151 | } |
152 | 152 | ||
153 | /** | 153 | /** |
154 | * inode_wb_list_del_locked - remove an inode from its bdi_writeback IO list | 154 | * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list |
155 | * @inode: inode to be removed | 155 | * @inode: inode to be removed |
156 | * @wb: bdi_writeback @inode is being removed from | 156 | * @wb: bdi_writeback @inode is being removed from |
157 | * | 157 | * |
158 | * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and | 158 | * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and |
159 | * clear %WB_has_dirty_io if all are empty afterwards. | 159 | * clear %WB_has_dirty_io if all are empty afterwards. |
160 | */ | 160 | */ |
161 | static void inode_wb_list_del_locked(struct inode *inode, | 161 | static void inode_io_list_del_locked(struct inode *inode, |
162 | struct bdi_writeback *wb) | 162 | struct bdi_writeback *wb) |
163 | { | 163 | { |
164 | assert_spin_locked(&wb->list_lock); | 164 | assert_spin_locked(&wb->list_lock); |
165 | 165 | ||
166 | list_del_init(&inode->i_wb_list); | 166 | list_del_init(&inode->i_io_list); |
167 | wb_io_lists_depopulated(wb); | 167 | wb_io_lists_depopulated(wb); |
168 | } | 168 | } |
169 | 169 | ||
@@ -351,7 +351,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) | |||
351 | 351 | ||
352 | /* | 352 | /* |
353 | * Once I_FREEING is visible under i_lock, the eviction path owns | 353 | * Once I_FREEING is visible under i_lock, the eviction path owns |
354 | * the inode and we shouldn't modify ->i_wb_list. | 354 | * the inode and we shouldn't modify ->i_io_list. |
355 | */ | 355 | */ |
356 | if (unlikely(inode->i_state & I_FREEING)) | 356 | if (unlikely(inode->i_state & I_FREEING)) |
357 | goto skip_switch; | 357 | goto skip_switch; |
@@ -390,16 +390,16 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) | |||
390 | * is always correct including from ->b_dirty_time. The transfer | 390 | * is always correct including from ->b_dirty_time. The transfer |
391 | * preserves @inode->dirtied_when ordering. | 391 | * preserves @inode->dirtied_when ordering. |
392 | */ | 392 | */ |
393 | if (!list_empty(&inode->i_wb_list)) { | 393 | if (!list_empty(&inode->i_io_list)) { |
394 | struct inode *pos; | 394 | struct inode *pos; |
395 | 395 | ||
396 | inode_wb_list_del_locked(inode, old_wb); | 396 | inode_io_list_del_locked(inode, old_wb); |
397 | inode->i_wb = new_wb; | 397 | inode->i_wb = new_wb; |
398 | list_for_each_entry(pos, &new_wb->b_dirty, i_wb_list) | 398 | list_for_each_entry(pos, &new_wb->b_dirty, i_io_list) |
399 | if (time_after_eq(inode->dirtied_when, | 399 | if (time_after_eq(inode->dirtied_when, |
400 | pos->dirtied_when)) | 400 | pos->dirtied_when)) |
401 | break; | 401 | break; |
402 | inode_wb_list_move_locked(inode, new_wb, pos->i_wb_list.prev); | 402 | inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev); |
403 | } else { | 403 | } else { |
404 | inode->i_wb = new_wb; | 404 | inode->i_wb = new_wb; |
405 | } | 405 | } |
@@ -961,12 +961,12 @@ void wb_start_background_writeback(struct bdi_writeback *wb) | |||
961 | /* | 961 | /* |
962 | * Remove the inode from the writeback list it is on. | 962 | * Remove the inode from the writeback list it is on. |
963 | */ | 963 | */ |
964 | void inode_wb_list_del(struct inode *inode) | 964 | void inode_io_list_del(struct inode *inode) |
965 | { | 965 | { |
966 | struct bdi_writeback *wb; | 966 | struct bdi_writeback *wb; |
967 | 967 | ||
968 | wb = inode_to_wb_and_lock_list(inode); | 968 | wb = inode_to_wb_and_lock_list(inode); |
969 | inode_wb_list_del_locked(inode, wb); | 969 | inode_io_list_del_locked(inode, wb); |
970 | spin_unlock(&wb->list_lock); | 970 | spin_unlock(&wb->list_lock); |
971 | } | 971 | } |
972 | 972 | ||
@@ -988,7 +988,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) | |||
988 | if (time_before(inode->dirtied_when, tail->dirtied_when)) | 988 | if (time_before(inode->dirtied_when, tail->dirtied_when)) |
989 | inode->dirtied_when = jiffies; | 989 | inode->dirtied_when = jiffies; |
990 | } | 990 | } |
991 | inode_wb_list_move_locked(inode, wb, &wb->b_dirty); | 991 | inode_io_list_move_locked(inode, wb, &wb->b_dirty); |
992 | } | 992 | } |
993 | 993 | ||
994 | /* | 994 | /* |
@@ -996,7 +996,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) | |||
996 | */ | 996 | */ |
997 | static void requeue_io(struct inode *inode, struct bdi_writeback *wb) | 997 | static void requeue_io(struct inode *inode, struct bdi_writeback *wb) |
998 | { | 998 | { |
999 | inode_wb_list_move_locked(inode, wb, &wb->b_more_io); | 999 | inode_io_list_move_locked(inode, wb, &wb->b_more_io); |
1000 | } | 1000 | } |
1001 | 1001 | ||
1002 | static void inode_sync_complete(struct inode *inode) | 1002 | static void inode_sync_complete(struct inode *inode) |
@@ -1055,7 +1055,7 @@ static int move_expired_inodes(struct list_head *delaying_queue, | |||
1055 | if (older_than_this && | 1055 | if (older_than_this && |
1056 | inode_dirtied_after(inode, *older_than_this)) | 1056 | inode_dirtied_after(inode, *older_than_this)) |
1057 | break; | 1057 | break; |
1058 | list_move(&inode->i_wb_list, &tmp); | 1058 | list_move(&inode->i_io_list, &tmp); |
1059 | moved++; | 1059 | moved++; |
1060 | if (flags & EXPIRE_DIRTY_ATIME) | 1060 | if (flags & EXPIRE_DIRTY_ATIME) |
1061 | set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); | 1061 | set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); |
@@ -1078,7 +1078,7 @@ static int move_expired_inodes(struct list_head *delaying_queue, | |||
1078 | list_for_each_prev_safe(pos, node, &tmp) { | 1078 | list_for_each_prev_safe(pos, node, &tmp) { |
1079 | inode = wb_inode(pos); | 1079 | inode = wb_inode(pos); |
1080 | if (inode->i_sb == sb) | 1080 | if (inode->i_sb == sb) |
1081 | list_move(&inode->i_wb_list, dispatch_queue); | 1081 | list_move(&inode->i_io_list, dispatch_queue); |
1082 | } | 1082 | } |
1083 | } | 1083 | } |
1084 | out: | 1084 | out: |
@@ -1232,10 +1232,10 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, | |||
1232 | redirty_tail(inode, wb); | 1232 | redirty_tail(inode, wb); |
1233 | } else if (inode->i_state & I_DIRTY_TIME) { | 1233 | } else if (inode->i_state & I_DIRTY_TIME) { |
1234 | inode->dirtied_when = jiffies; | 1234 | inode->dirtied_when = jiffies; |
1235 | inode_wb_list_move_locked(inode, wb, &wb->b_dirty_time); | 1235 | inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); |
1236 | } else { | 1236 | } else { |
1237 | /* The inode is clean. Remove from writeback lists. */ | 1237 | /* The inode is clean. Remove from writeback lists. */ |
1238 | inode_wb_list_del_locked(inode, wb); | 1238 | inode_io_list_del_locked(inode, wb); |
1239 | } | 1239 | } |
1240 | } | 1240 | } |
1241 | 1241 | ||
@@ -1378,7 +1378,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, | |||
1378 | * touch it. See comment above for explanation. | 1378 | * touch it. See comment above for explanation. |
1379 | */ | 1379 | */ |
1380 | if (!(inode->i_state & I_DIRTY_ALL)) | 1380 | if (!(inode->i_state & I_DIRTY_ALL)) |
1381 | inode_wb_list_del_locked(inode, wb); | 1381 | inode_io_list_del_locked(inode, wb); |
1382 | spin_unlock(&wb->list_lock); | 1382 | spin_unlock(&wb->list_lock); |
1383 | inode_sync_complete(inode); | 1383 | inode_sync_complete(inode); |
1384 | out: | 1384 | out: |
@@ -1439,7 +1439,9 @@ static long writeback_sb_inodes(struct super_block *sb, | |||
1439 | unsigned long start_time = jiffies; | 1439 | unsigned long start_time = jiffies; |
1440 | long write_chunk; | 1440 | long write_chunk; |
1441 | long wrote = 0; /* count both pages and inodes */ | 1441 | long wrote = 0; /* count both pages and inodes */ |
1442 | struct blk_plug plug; | ||
1442 | 1443 | ||
1444 | blk_start_plug(&plug); | ||
1443 | while (!list_empty(&wb->b_io)) { | 1445 | while (!list_empty(&wb->b_io)) { |
1444 | struct inode *inode = wb_inode(wb->b_io.prev); | 1446 | struct inode *inode = wb_inode(wb->b_io.prev); |
1445 | 1447 | ||
@@ -1537,6 +1539,7 @@ static long writeback_sb_inodes(struct super_block *sb, | |||
1537 | break; | 1539 | break; |
1538 | } | 1540 | } |
1539 | } | 1541 | } |
1542 | blk_finish_plug(&plug); | ||
1540 | return wrote; | 1543 | return wrote; |
1541 | } | 1544 | } |
1542 | 1545 | ||
@@ -2088,7 +2091,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
2088 | else | 2091 | else |
2089 | dirty_list = &wb->b_dirty_time; | 2092 | dirty_list = &wb->b_dirty_time; |
2090 | 2093 | ||
2091 | wakeup_bdi = inode_wb_list_move_locked(inode, wb, | 2094 | wakeup_bdi = inode_io_list_move_locked(inode, wb, |
2092 | dirty_list); | 2095 | dirty_list); |
2093 | 2096 | ||
2094 | spin_unlock(&wb->list_lock); | 2097 | spin_unlock(&wb->list_lock); |
@@ -2111,6 +2114,15 @@ out_unlock_inode: | |||
2111 | } | 2114 | } |
2112 | EXPORT_SYMBOL(__mark_inode_dirty); | 2115 | EXPORT_SYMBOL(__mark_inode_dirty); |
2113 | 2116 | ||
2117 | /* | ||
2118 | * The @s_sync_lock is used to serialise concurrent sync operations | ||
2119 | * to avoid lock contention problems with concurrent wait_sb_inodes() calls. | ||
2120 | * Concurrent callers will block on the s_sync_lock rather than doing contending | ||
2121 | * walks. The queueing maintains sync(2) required behaviour as all the IO that | ||
2122 | * has been issued up to the time this function is enter is guaranteed to be | ||
2123 | * completed by the time we have gained the lock and waited for all IO that is | ||
2124 | * in progress regardless of the order callers are granted the lock. | ||
2125 | */ | ||
2114 | static void wait_sb_inodes(struct super_block *sb) | 2126 | static void wait_sb_inodes(struct super_block *sb) |
2115 | { | 2127 | { |
2116 | struct inode *inode, *old_inode = NULL; | 2128 | struct inode *inode, *old_inode = NULL; |
@@ -2121,7 +2133,8 @@ static void wait_sb_inodes(struct super_block *sb) | |||
2121 | */ | 2133 | */ |
2122 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 2134 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
2123 | 2135 | ||
2124 | spin_lock(&inode_sb_list_lock); | 2136 | mutex_lock(&sb->s_sync_lock); |
2137 | spin_lock(&sb->s_inode_list_lock); | ||
2125 | 2138 | ||
2126 | /* | 2139 | /* |
2127 | * Data integrity sync. Must wait for all pages under writeback, | 2140 | * Data integrity sync. Must wait for all pages under writeback, |
@@ -2141,14 +2154,14 @@ static void wait_sb_inodes(struct super_block *sb) | |||
2141 | } | 2154 | } |
2142 | __iget(inode); | 2155 | __iget(inode); |
2143 | spin_unlock(&inode->i_lock); | 2156 | spin_unlock(&inode->i_lock); |
2144 | spin_unlock(&inode_sb_list_lock); | 2157 | spin_unlock(&sb->s_inode_list_lock); |
2145 | 2158 | ||
2146 | /* | 2159 | /* |
2147 | * We hold a reference to 'inode' so it couldn't have been | 2160 | * We hold a reference to 'inode' so it couldn't have been |
2148 | * removed from s_inodes list while we dropped the | 2161 | * removed from s_inodes list while we dropped the |
2149 | * inode_sb_list_lock. We cannot iput the inode now as we can | 2162 | * s_inode_list_lock. We cannot iput the inode now as we can |
2150 | * be holding the last reference and we cannot iput it under | 2163 | * be holding the last reference and we cannot iput it under |
2151 | * inode_sb_list_lock. So we keep the reference and iput it | 2164 | * s_inode_list_lock. So we keep the reference and iput it |
2152 | * later. | 2165 | * later. |
2153 | */ | 2166 | */ |
2154 | iput(old_inode); | 2167 | iput(old_inode); |
@@ -2158,10 +2171,11 @@ static void wait_sb_inodes(struct super_block *sb) | |||
2158 | 2171 | ||
2159 | cond_resched(); | 2172 | cond_resched(); |
2160 | 2173 | ||
2161 | spin_lock(&inode_sb_list_lock); | 2174 | spin_lock(&sb->s_inode_list_lock); |
2162 | } | 2175 | } |
2163 | spin_unlock(&inode_sb_list_lock); | 2176 | spin_unlock(&sb->s_inode_list_lock); |
2164 | iput(old_inode); | 2177 | iput(old_inode); |
2178 | mutex_unlock(&sb->s_sync_lock); | ||
2165 | } | 2179 | } |
2166 | 2180 | ||
2167 | static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, | 2181 | static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, |