diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-25 11:39:18 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-25 11:39:18 -0500 |
commit | e07dd2ad305f6b29b47d713600aa8b722ef2a9f7 (patch) | |
tree | 4815808e538ec625bf2766b1eb9d91c7b3beaead /fs/gfs2/ops_address.c | |
parent | eba0e319c12fb098d66316a8eafbaaa9174a07c3 (diff) | |
parent | 7bc5c414fe6627ec518c82d154c796f0981f5b02 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (56 commits)
[GFS2] Allow journal recovery on read-only mount
[GFS2] Lockup on error
[GFS2] Fix page_mkwrite truncation race path
[GFS2] Fix typo
[GFS2] Fix write alloc required shortcut calculation
[GFS2] gfs2_alloc_required performance
[GFS2] Remove unneeded i_spin
[GFS2] Reduce inode size by moving i_alloc out of line
[GFS2] Fix assert in log code
[GFS2] Fix problems relating to execution of files on GFS2
[GFS2] Initialize extent_list earlier
[GFS2] Allow page migration for writeback and ordered pages
[GFS2] Remove unused variable
[GFS2] Fix log block mapper
[GFS2] Minor correction
[GFS2] Eliminate the no longer needed sd_statfs_mutex
[GFS2] Incremental patch to fix compiler warning
[GFS2] Function meta_read optimization
[GFS2] Only fetch the dinode once in block_map
[GFS2] Reorganize function gfs2_glmutex_lock
...
Diffstat (limited to 'fs/gfs2/ops_address.c')
-rw-r--r-- | fs/gfs2/ops_address.c | 649 |
1 files changed, 469 insertions, 180 deletions
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 9679f8b9870d..38dbe99a30ed 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include <linux/swap.h> | 20 | #include <linux/swap.h> |
21 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
23 | #include <linux/backing-dev.h> | ||
24 | #include <linux/pagevec.h> | ||
23 | 25 | ||
24 | #include "gfs2.h" | 26 | #include "gfs2.h" |
25 | #include "incore.h" | 27 | #include "incore.h" |
@@ -32,7 +34,6 @@ | |||
32 | #include "quota.h" | 34 | #include "quota.h" |
33 | #include "trans.h" | 35 | #include "trans.h" |
34 | #include "rgrp.h" | 36 | #include "rgrp.h" |
35 | #include "ops_file.h" | ||
36 | #include "super.h" | 37 | #include "super.h" |
37 | #include "util.h" | 38 | #include "util.h" |
38 | #include "glops.h" | 39 | #include "glops.h" |
@@ -58,22 +59,6 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | |||
58 | } | 59 | } |
59 | 60 | ||
60 | /** | 61 | /** |
61 | * gfs2_get_block - Fills in a buffer head with details about a block | ||
62 | * @inode: The inode | ||
63 | * @lblock: The block number to look up | ||
64 | * @bh_result: The buffer head to return the result in | ||
65 | * @create: Non-zero if we may add block to the file | ||
66 | * | ||
67 | * Returns: errno | ||
68 | */ | ||
69 | |||
70 | int gfs2_get_block(struct inode *inode, sector_t lblock, | ||
71 | struct buffer_head *bh_result, int create) | ||
72 | { | ||
73 | return gfs2_block_map(inode, lblock, create, bh_result); | ||
74 | } | ||
75 | |||
76 | /** | ||
77 | * gfs2_get_block_noalloc - Fills in a buffer head with details about a block | 62 | * gfs2_get_block_noalloc - Fills in a buffer head with details about a block |
78 | * @inode: The inode | 63 | * @inode: The inode |
79 | * @lblock: The block number to look up | 64 | * @lblock: The block number to look up |
@@ -88,7 +73,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, | |||
88 | { | 73 | { |
89 | int error; | 74 | int error; |
90 | 75 | ||
91 | error = gfs2_block_map(inode, lblock, 0, bh_result); | 76 | error = gfs2_block_map(inode, lblock, bh_result, 0); |
92 | if (error) | 77 | if (error) |
93 | return error; | 78 | return error; |
94 | if (!buffer_mapped(bh_result)) | 79 | if (!buffer_mapped(bh_result)) |
@@ -99,20 +84,19 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, | |||
99 | static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, | 84 | static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, |
100 | struct buffer_head *bh_result, int create) | 85 | struct buffer_head *bh_result, int create) |
101 | { | 86 | { |
102 | return gfs2_block_map(inode, lblock, 0, bh_result); | 87 | return gfs2_block_map(inode, lblock, bh_result, 0); |
103 | } | 88 | } |
104 | 89 | ||
105 | /** | 90 | /** |
106 | * gfs2_writepage - Write complete page | 91 | * gfs2_writepage_common - Common bits of writepage |
107 | * @page: Page to write | 92 | * @page: The page to be written |
93 | * @wbc: The writeback control | ||
108 | * | 94 | * |
109 | * Returns: errno | 95 | * Returns: 1 if writepage is ok, otherwise an error code or zero if no error. |
110 | * | ||
111 | * Some of this is copied from block_write_full_page() although we still | ||
112 | * call it to do most of the work. | ||
113 | */ | 96 | */ |
114 | 97 | ||
115 | static int gfs2_writepage(struct page *page, struct writeback_control *wbc) | 98 | static int gfs2_writepage_common(struct page *page, |
99 | struct writeback_control *wbc) | ||
116 | { | 100 | { |
117 | struct inode *inode = page->mapping->host; | 101 | struct inode *inode = page->mapping->host; |
118 | struct gfs2_inode *ip = GFS2_I(inode); | 102 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -120,41 +104,133 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc) | |||
120 | loff_t i_size = i_size_read(inode); | 104 | loff_t i_size = i_size_read(inode); |
121 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | 105 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; |
122 | unsigned offset; | 106 | unsigned offset; |
123 | int error; | 107 | int ret = -EIO; |
124 | int done_trans = 0; | ||
125 | 108 | ||
126 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) { | 109 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) |
127 | unlock_page(page); | 110 | goto out; |
128 | return -EIO; | 111 | ret = 0; |
129 | } | ||
130 | if (current->journal_info) | 112 | if (current->journal_info) |
131 | goto out_ignore; | 113 | goto redirty; |
132 | |||
133 | /* Is the page fully outside i_size? (truncate in progress) */ | 114 | /* Is the page fully outside i_size? (truncate in progress) */ |
134 | offset = i_size & (PAGE_CACHE_SIZE-1); | 115 | offset = i_size & (PAGE_CACHE_SIZE-1); |
135 | if (page->index > end_index || (page->index == end_index && !offset)) { | 116 | if (page->index > end_index || (page->index == end_index && !offset)) { |
136 | page->mapping->a_ops->invalidatepage(page, 0); | 117 | page->mapping->a_ops->invalidatepage(page, 0); |
137 | unlock_page(page); | 118 | goto out; |
138 | return 0; /* don't care */ | 119 | } |
120 | return 1; | ||
121 | redirty: | ||
122 | redirty_page_for_writepage(wbc, page); | ||
123 | out: | ||
124 | unlock_page(page); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * gfs2_writeback_writepage - Write page for writeback mappings | ||
130 | * @page: The page | ||
131 | * @wbc: The writeback control | ||
132 | * | ||
133 | */ | ||
134 | |||
135 | static int gfs2_writeback_writepage(struct page *page, | ||
136 | struct writeback_control *wbc) | ||
137 | { | ||
138 | int ret; | ||
139 | |||
140 | ret = gfs2_writepage_common(page, wbc); | ||
141 | if (ret <= 0) | ||
142 | return ret; | ||
143 | |||
144 | ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc); | ||
145 | if (ret == -EAGAIN) | ||
146 | ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
147 | return ret; | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * gfs2_ordered_writepage - Write page for ordered data files | ||
152 | * @page: The page to write | ||
153 | * @wbc: The writeback control | ||
154 | * | ||
155 | */ | ||
156 | |||
157 | static int gfs2_ordered_writepage(struct page *page, | ||
158 | struct writeback_control *wbc) | ||
159 | { | ||
160 | struct inode *inode = page->mapping->host; | ||
161 | struct gfs2_inode *ip = GFS2_I(inode); | ||
162 | int ret; | ||
163 | |||
164 | ret = gfs2_writepage_common(page, wbc); | ||
165 | if (ret <= 0) | ||
166 | return ret; | ||
167 | |||
168 | if (!page_has_buffers(page)) { | ||
169 | create_empty_buffers(page, inode->i_sb->s_blocksize, | ||
170 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
139 | } | 171 | } |
172 | gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); | ||
173 | return block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
174 | } | ||
140 | 175 | ||
141 | if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) && | 176 | /** |
142 | PageChecked(page)) { | 177 | * __gfs2_jdata_writepage - The core of jdata writepage |
178 | * @page: The page to write | ||
179 | * @wbc: The writeback control | ||
180 | * | ||
181 | * This is shared between writepage and writepages and implements the | ||
182 | * core of the writepage operation. If a transaction is required then | ||
183 | * PageChecked will have been set and the transaction will have | ||
184 | * already been started before this is called. | ||
185 | */ | ||
186 | |||
187 | static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) | ||
188 | { | ||
189 | struct inode *inode = page->mapping->host; | ||
190 | struct gfs2_inode *ip = GFS2_I(inode); | ||
191 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
192 | |||
193 | if (PageChecked(page)) { | ||
143 | ClearPageChecked(page); | 194 | ClearPageChecked(page); |
144 | error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); | ||
145 | if (error) | ||
146 | goto out_ignore; | ||
147 | if (!page_has_buffers(page)) { | 195 | if (!page_has_buffers(page)) { |
148 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 196 | create_empty_buffers(page, inode->i_sb->s_blocksize, |
149 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 197 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
150 | } | 198 | } |
151 | gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); | 199 | gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1); |
200 | } | ||
201 | return block_write_full_page(page, gfs2_get_block_noalloc, wbc); | ||
202 | } | ||
203 | |||
204 | /** | ||
205 | * gfs2_jdata_writepage - Write complete page | ||
206 | * @page: Page to write | ||
207 | * | ||
208 | * Returns: errno | ||
209 | * | ||
210 | */ | ||
211 | |||
212 | static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc) | ||
213 | { | ||
214 | struct inode *inode = page->mapping->host; | ||
215 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
216 | int error; | ||
217 | int done_trans = 0; | ||
218 | |||
219 | error = gfs2_writepage_common(page, wbc); | ||
220 | if (error <= 0) | ||
221 | return error; | ||
222 | |||
223 | if (PageChecked(page)) { | ||
224 | if (wbc->sync_mode != WB_SYNC_ALL) | ||
225 | goto out_ignore; | ||
226 | error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0); | ||
227 | if (error) | ||
228 | goto out_ignore; | ||
152 | done_trans = 1; | 229 | done_trans = 1; |
153 | } | 230 | } |
154 | error = block_write_full_page(page, gfs2_get_block_noalloc, wbc); | 231 | error = __gfs2_jdata_writepage(page, wbc); |
155 | if (done_trans) | 232 | if (done_trans) |
156 | gfs2_trans_end(sdp); | 233 | gfs2_trans_end(sdp); |
157 | gfs2_meta_cache_flush(ip); | ||
158 | return error; | 234 | return error; |
159 | 235 | ||
160 | out_ignore: | 236 | out_ignore: |
@@ -164,29 +240,190 @@ out_ignore: | |||
164 | } | 240 | } |
165 | 241 | ||
166 | /** | 242 | /** |
167 | * gfs2_writepages - Write a bunch of dirty pages back to disk | 243 | * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk |
168 | * @mapping: The mapping to write | 244 | * @mapping: The mapping to write |
169 | * @wbc: Write-back control | 245 | * @wbc: Write-back control |
170 | * | 246 | * |
171 | * For journaled files and/or ordered writes this just falls back to the | 247 | * For the data=writeback case we can already ignore buffer heads |
172 | * kernel's default writepages path for now. We will probably want to change | ||
173 | * that eventually (i.e. when we look at allocate on flush). | ||
174 | * | ||
175 | * For the data=writeback case though we can already ignore buffer heads | ||
176 | * and write whole extents at once. This is a big reduction in the | 248 | * and write whole extents at once. This is a big reduction in the |
177 | * number of I/O requests we send and the bmap calls we make in this case. | 249 | * number of I/O requests we send and the bmap calls we make in this case. |
178 | */ | 250 | */ |
179 | static int gfs2_writepages(struct address_space *mapping, | 251 | static int gfs2_writeback_writepages(struct address_space *mapping, |
180 | struct writeback_control *wbc) | 252 | struct writeback_control *wbc) |
253 | { | ||
254 | return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages | ||
259 | * @mapping: The mapping | ||
260 | * @wbc: The writeback control | ||
261 | * @writepage: The writepage function to call for each page | ||
262 | * @pvec: The vector of pages | ||
263 | * @nr_pages: The number of pages to write | ||
264 | * | ||
265 | * Returns: non-zero if loop should terminate, zero otherwise | ||
266 | */ | ||
267 | |||
268 | static int gfs2_write_jdata_pagevec(struct address_space *mapping, | ||
269 | struct writeback_control *wbc, | ||
270 | struct pagevec *pvec, | ||
271 | int nr_pages, pgoff_t end) | ||
181 | { | 272 | { |
182 | struct inode *inode = mapping->host; | 273 | struct inode *inode = mapping->host; |
183 | struct gfs2_inode *ip = GFS2_I(inode); | ||
184 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 274 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
275 | loff_t i_size = i_size_read(inode); | ||
276 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
277 | unsigned offset = i_size & (PAGE_CACHE_SIZE-1); | ||
278 | unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); | ||
279 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
280 | int i; | ||
281 | int ret; | ||
282 | |||
283 | ret = gfs2_trans_begin(sdp, nrblocks, 0); | ||
284 | if (ret < 0) | ||
285 | return ret; | ||
286 | |||
287 | for(i = 0; i < nr_pages; i++) { | ||
288 | struct page *page = pvec->pages[i]; | ||
289 | |||
290 | lock_page(page); | ||
291 | |||
292 | if (unlikely(page->mapping != mapping)) { | ||
293 | unlock_page(page); | ||
294 | continue; | ||
295 | } | ||
296 | |||
297 | if (!wbc->range_cyclic && page->index > end) { | ||
298 | ret = 1; | ||
299 | unlock_page(page); | ||
300 | continue; | ||
301 | } | ||
302 | |||
303 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
304 | wait_on_page_writeback(page); | ||
305 | |||
306 | if (PageWriteback(page) || | ||
307 | !clear_page_dirty_for_io(page)) { | ||
308 | unlock_page(page); | ||
309 | continue; | ||
310 | } | ||
311 | |||
312 | /* Is the page fully outside i_size? (truncate in progress) */ | ||
313 | if (page->index > end_index || (page->index == end_index && !offset)) { | ||
314 | page->mapping->a_ops->invalidatepage(page, 0); | ||
315 | unlock_page(page); | ||
316 | continue; | ||
317 | } | ||
318 | |||
319 | ret = __gfs2_jdata_writepage(page, wbc); | ||
320 | |||
321 | if (ret || (--(wbc->nr_to_write) <= 0)) | ||
322 | ret = 1; | ||
323 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
324 | wbc->encountered_congestion = 1; | ||
325 | ret = 1; | ||
326 | } | ||
327 | |||
328 | } | ||
329 | gfs2_trans_end(sdp); | ||
330 | return ret; | ||
331 | } | ||
332 | |||
333 | /** | ||
334 | * gfs2_write_cache_jdata - Like write_cache_pages but different | ||
335 | * @mapping: The mapping to write | ||
336 | * @wbc: The writeback control | ||
337 | * @writepage: The writepage function to call | ||
338 | * @data: The data to pass to writepage | ||
339 | * | ||
340 | * The reason that we use our own function here is that we need to | ||
341 | * start transactions before we grab page locks. This allows us | ||
342 | * to get the ordering right. | ||
343 | */ | ||
344 | |||
345 | static int gfs2_write_cache_jdata(struct address_space *mapping, | ||
346 | struct writeback_control *wbc) | ||
347 | { | ||
348 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
349 | int ret = 0; | ||
350 | int done = 0; | ||
351 | struct pagevec pvec; | ||
352 | int nr_pages; | ||
353 | pgoff_t index; | ||
354 | pgoff_t end; | ||
355 | int scanned = 0; | ||
356 | int range_whole = 0; | ||
357 | |||
358 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
359 | wbc->encountered_congestion = 1; | ||
360 | return 0; | ||
361 | } | ||
362 | |||
363 | pagevec_init(&pvec, 0); | ||
364 | if (wbc->range_cyclic) { | ||
365 | index = mapping->writeback_index; /* Start from prev offset */ | ||
366 | end = -1; | ||
367 | } else { | ||
368 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
369 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
370 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
371 | range_whole = 1; | ||
372 | scanned = 1; | ||
373 | } | ||
185 | 374 | ||
186 | if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip)) | 375 | retry: |
187 | return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); | 376 | while (!done && (index <= end) && |
377 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
378 | PAGECACHE_TAG_DIRTY, | ||
379 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
380 | scanned = 1; | ||
381 | ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); | ||
382 | if (ret) | ||
383 | done = 1; | ||
384 | if (ret > 0) | ||
385 | ret = 0; | ||
386 | |||
387 | pagevec_release(&pvec); | ||
388 | cond_resched(); | ||
389 | } | ||
390 | |||
391 | if (!scanned && !done) { | ||
392 | /* | ||
393 | * We hit the last page and there is more work to be done: wrap | ||
394 | * back to the start of the file | ||
395 | */ | ||
396 | scanned = 1; | ||
397 | index = 0; | ||
398 | goto retry; | ||
399 | } | ||
400 | |||
401 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
402 | mapping->writeback_index = index; | ||
403 | return ret; | ||
404 | } | ||
405 | |||
406 | |||
407 | /** | ||
408 | * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk | ||
409 | * @mapping: The mapping to write | ||
410 | * @wbc: The writeback control | ||
411 | * | ||
412 | */ | ||
188 | 413 | ||
189 | return generic_writepages(mapping, wbc); | 414 | static int gfs2_jdata_writepages(struct address_space *mapping, |
415 | struct writeback_control *wbc) | ||
416 | { | ||
417 | struct gfs2_inode *ip = GFS2_I(mapping->host); | ||
418 | struct gfs2_sbd *sdp = GFS2_SB(mapping->host); | ||
419 | int ret; | ||
420 | |||
421 | ret = gfs2_write_cache_jdata(mapping, wbc); | ||
422 | if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { | ||
423 | gfs2_log_flush(sdp, ip->i_gl); | ||
424 | ret = gfs2_write_cache_jdata(mapping, wbc); | ||
425 | } | ||
426 | return ret; | ||
190 | } | 427 | } |
191 | 428 | ||
192 | /** | 429 | /** |
@@ -231,62 +468,107 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) | |||
231 | 468 | ||
232 | 469 | ||
233 | /** | 470 | /** |
234 | * gfs2_readpage - readpage with locking | 471 | * __gfs2_readpage - readpage |
235 | * @file: The file to read a page for. N.B. This may be NULL if we are | 472 | * @file: The file to read a page for |
236 | * reading an internal file. | ||
237 | * @page: The page to read | 473 | * @page: The page to read |
238 | * | 474 | * |
239 | * Returns: errno | 475 | * This is the core of gfs2's readpage. Its used by the internal file |
476 | * reading code as in that case we already hold the glock. Also its | ||
477 | * called by gfs2_readpage() once the required lock has been granted. | ||
478 | * | ||
240 | */ | 479 | */ |
241 | 480 | ||
242 | static int gfs2_readpage(struct file *file, struct page *page) | 481 | static int __gfs2_readpage(void *file, struct page *page) |
243 | { | 482 | { |
244 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 483 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); |
245 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | 484 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); |
246 | struct gfs2_file *gf = NULL; | ||
247 | struct gfs2_holder gh; | ||
248 | int error; | 485 | int error; |
249 | int do_unlock = 0; | ||
250 | |||
251 | if (likely(file != &gfs2_internal_file_sentinel)) { | ||
252 | if (file) { | ||
253 | gf = file->private_data; | ||
254 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) | ||
255 | /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */ | ||
256 | goto skip_lock; | ||
257 | } | ||
258 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); | ||
259 | do_unlock = 1; | ||
260 | error = gfs2_glock_nq_atime(&gh); | ||
261 | if (unlikely(error)) | ||
262 | goto out_unlock; | ||
263 | } | ||
264 | 486 | ||
265 | skip_lock: | ||
266 | if (gfs2_is_stuffed(ip)) { | 487 | if (gfs2_is_stuffed(ip)) { |
267 | error = stuffed_readpage(ip, page); | 488 | error = stuffed_readpage(ip, page); |
268 | unlock_page(page); | 489 | unlock_page(page); |
269 | } else | 490 | } else { |
270 | error = mpage_readpage(page, gfs2_get_block); | 491 | error = mpage_readpage(page, gfs2_block_map); |
492 | } | ||
271 | 493 | ||
272 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 494 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
273 | error = -EIO; | 495 | return -EIO; |
496 | |||
497 | return error; | ||
498 | } | ||
499 | |||
500 | /** | ||
501 | * gfs2_readpage - read a page of a file | ||
502 | * @file: The file to read | ||
503 | * @page: The page of the file | ||
504 | * | ||
505 | * This deals with the locking required. We use a trylock in order to | ||
506 | * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE | ||
507 | * in the event that we are unable to get the lock. | ||
508 | */ | ||
509 | |||
510 | static int gfs2_readpage(struct file *file, struct page *page) | ||
511 | { | ||
512 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
513 | struct gfs2_holder gh; | ||
514 | int error; | ||
274 | 515 | ||
275 | if (do_unlock) { | 516 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); |
276 | gfs2_glock_dq_m(1, &gh); | 517 | error = gfs2_glock_nq_atime(&gh); |
277 | gfs2_holder_uninit(&gh); | 518 | if (unlikely(error)) { |
519 | unlock_page(page); | ||
520 | goto out; | ||
278 | } | 521 | } |
522 | error = __gfs2_readpage(file, page); | ||
523 | gfs2_glock_dq(&gh); | ||
279 | out: | 524 | out: |
280 | return error; | 525 | gfs2_holder_uninit(&gh); |
281 | out_unlock: | ||
282 | unlock_page(page); | ||
283 | if (error == GLR_TRYFAILED) { | 526 | if (error == GLR_TRYFAILED) { |
284 | error = AOP_TRUNCATED_PAGE; | ||
285 | yield(); | 527 | yield(); |
528 | return AOP_TRUNCATED_PAGE; | ||
286 | } | 529 | } |
287 | if (do_unlock) | 530 | return error; |
288 | gfs2_holder_uninit(&gh); | 531 | } |
289 | goto out; | 532 | |
533 | /** | ||
534 | * gfs2_internal_read - read an internal file | ||
535 | * @ip: The gfs2 inode | ||
536 | * @ra_state: The readahead state (or NULL for no readahead) | ||
537 | * @buf: The buffer to fill | ||
538 | * @pos: The file position | ||
539 | * @size: The amount to read | ||
540 | * | ||
541 | */ | ||
542 | |||
543 | int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, | ||
544 | char *buf, loff_t *pos, unsigned size) | ||
545 | { | ||
546 | struct address_space *mapping = ip->i_inode.i_mapping; | ||
547 | unsigned long index = *pos / PAGE_CACHE_SIZE; | ||
548 | unsigned offset = *pos & (PAGE_CACHE_SIZE - 1); | ||
549 | unsigned copied = 0; | ||
550 | unsigned amt; | ||
551 | struct page *page; | ||
552 | void *p; | ||
553 | |||
554 | do { | ||
555 | amt = size - copied; | ||
556 | if (offset + size > PAGE_CACHE_SIZE) | ||
557 | amt = PAGE_CACHE_SIZE - offset; | ||
558 | page = read_cache_page(mapping, index, __gfs2_readpage, NULL); | ||
559 | if (IS_ERR(page)) | ||
560 | return PTR_ERR(page); | ||
561 | p = kmap_atomic(page, KM_USER0); | ||
562 | memcpy(buf + copied, p + offset, amt); | ||
563 | kunmap_atomic(p, KM_USER0); | ||
564 | mark_page_accessed(page); | ||
565 | page_cache_release(page); | ||
566 | copied += amt; | ||
567 | index++; | ||
568 | offset = 0; | ||
569 | } while(copied < size); | ||
570 | (*pos) += size; | ||
571 | return size; | ||
290 | } | 572 | } |
291 | 573 | ||
292 | /** | 574 | /** |
@@ -300,10 +582,9 @@ out_unlock: | |||
300 | * Any I/O we ignore at this time will be done via readpage later. | 582 | * Any I/O we ignore at this time will be done via readpage later. |
301 | * 2. We don't handle stuffed files here we let readpage do the honours. | 583 | * 2. We don't handle stuffed files here we let readpage do the honours. |
302 | * 3. mpage_readpages() does most of the heavy lifting in the common case. | 584 | * 3. mpage_readpages() does most of the heavy lifting in the common case. |
303 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. | 585 | * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places. |
304 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as | ||
305 | * well as read-ahead. | ||
306 | */ | 586 | */ |
587 | |||
307 | static int gfs2_readpages(struct file *file, struct address_space *mapping, | 588 | static int gfs2_readpages(struct file *file, struct address_space *mapping, |
308 | struct list_head *pages, unsigned nr_pages) | 589 | struct list_head *pages, unsigned nr_pages) |
309 | { | 590 | { |
@@ -311,42 +592,20 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
311 | struct gfs2_inode *ip = GFS2_I(inode); | 592 | struct gfs2_inode *ip = GFS2_I(inode); |
312 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 593 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
313 | struct gfs2_holder gh; | 594 | struct gfs2_holder gh; |
314 | int ret = 0; | 595 | int ret; |
315 | int do_unlock = 0; | ||
316 | 596 | ||
317 | if (likely(file != &gfs2_internal_file_sentinel)) { | 597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); |
318 | if (file) { | 598 | ret = gfs2_glock_nq_atime(&gh); |
319 | struct gfs2_file *gf = file->private_data; | 599 | if (unlikely(ret)) |
320 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) | 600 | goto out_uninit; |
321 | goto skip_lock; | ||
322 | } | ||
323 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, | ||
324 | LM_FLAG_TRY_1CB|GL_ATIME, &gh); | ||
325 | do_unlock = 1; | ||
326 | ret = gfs2_glock_nq_atime(&gh); | ||
327 | if (ret == GLR_TRYFAILED) | ||
328 | goto out_noerror; | ||
329 | if (unlikely(ret)) | ||
330 | goto out_unlock; | ||
331 | } | ||
332 | skip_lock: | ||
333 | if (!gfs2_is_stuffed(ip)) | 601 | if (!gfs2_is_stuffed(ip)) |
334 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); | 602 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map); |
335 | 603 | gfs2_glock_dq(&gh); | |
336 | if (do_unlock) { | 604 | out_uninit: |
337 | gfs2_glock_dq_m(1, &gh); | 605 | gfs2_holder_uninit(&gh); |
338 | gfs2_holder_uninit(&gh); | ||
339 | } | ||
340 | out: | ||
341 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 606 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
342 | ret = -EIO; | 607 | ret = -EIO; |
343 | return ret; | 608 | return ret; |
344 | out_noerror: | ||
345 | ret = 0; | ||
346 | out_unlock: | ||
347 | if (do_unlock) | ||
348 | gfs2_holder_uninit(&gh); | ||
349 | goto out; | ||
350 | } | 609 | } |
351 | 610 | ||
352 | /** | 611 | /** |
@@ -382,20 +641,11 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
382 | if (unlikely(error)) | 641 | if (unlikely(error)) |
383 | goto out_uninit; | 642 | goto out_uninit; |
384 | 643 | ||
385 | error = -ENOMEM; | ||
386 | page = __grab_cache_page(mapping, index); | ||
387 | *pagep = page; | ||
388 | if (!page) | ||
389 | goto out_unlock; | ||
390 | |||
391 | gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); | 644 | gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); |
392 | |||
393 | error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); | 645 | error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); |
394 | if (error) | 646 | if (error) |
395 | goto out_putpage; | 647 | goto out_unlock; |
396 | |||
397 | 648 | ||
398 | ip->i_alloc.al_requested = 0; | ||
399 | if (alloc_required) { | 649 | if (alloc_required) { |
400 | al = gfs2_alloc_get(ip); | 650 | al = gfs2_alloc_get(ip); |
401 | 651 | ||
@@ -424,40 +674,47 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
424 | if (error) | 674 | if (error) |
425 | goto out_trans_fail; | 675 | goto out_trans_fail; |
426 | 676 | ||
677 | error = -ENOMEM; | ||
678 | page = __grab_cache_page(mapping, index); | ||
679 | *pagep = page; | ||
680 | if (unlikely(!page)) | ||
681 | goto out_endtrans; | ||
682 | |||
427 | if (gfs2_is_stuffed(ip)) { | 683 | if (gfs2_is_stuffed(ip)) { |
684 | error = 0; | ||
428 | if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | 685 | if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { |
429 | error = gfs2_unstuff_dinode(ip, page); | 686 | error = gfs2_unstuff_dinode(ip, page); |
430 | if (error == 0) | 687 | if (error == 0) |
431 | goto prepare_write; | 688 | goto prepare_write; |
432 | } else if (!PageUptodate(page)) | 689 | } else if (!PageUptodate(page)) { |
433 | error = stuffed_readpage(ip, page); | 690 | error = stuffed_readpage(ip, page); |
691 | } | ||
434 | goto out; | 692 | goto out; |
435 | } | 693 | } |
436 | 694 | ||
437 | prepare_write: | 695 | prepare_write: |
438 | error = block_prepare_write(page, from, to, gfs2_get_block); | 696 | error = block_prepare_write(page, from, to, gfs2_block_map); |
439 | |||
440 | out: | 697 | out: |
441 | if (error) { | 698 | if (error == 0) |
442 | gfs2_trans_end(sdp); | 699 | return 0; |
700 | |||
701 | page_cache_release(page); | ||
702 | if (pos + len > ip->i_inode.i_size) | ||
703 | vmtruncate(&ip->i_inode, ip->i_inode.i_size); | ||
704 | out_endtrans: | ||
705 | gfs2_trans_end(sdp); | ||
443 | out_trans_fail: | 706 | out_trans_fail: |
444 | if (alloc_required) { | 707 | if (alloc_required) { |
445 | gfs2_inplace_release(ip); | 708 | gfs2_inplace_release(ip); |
446 | out_qunlock: | 709 | out_qunlock: |
447 | gfs2_quota_unlock(ip); | 710 | gfs2_quota_unlock(ip); |
448 | out_alloc_put: | 711 | out_alloc_put: |
449 | gfs2_alloc_put(ip); | 712 | gfs2_alloc_put(ip); |
450 | } | 713 | } |
451 | out_putpage: | ||
452 | page_cache_release(page); | ||
453 | if (pos + len > ip->i_inode.i_size) | ||
454 | vmtruncate(&ip->i_inode, ip->i_inode.i_size); | ||
455 | out_unlock: | 714 | out_unlock: |
456 | gfs2_glock_dq_m(1, &ip->i_gh); | 715 | gfs2_glock_dq(&ip->i_gh); |
457 | out_uninit: | 716 | out_uninit: |
458 | gfs2_holder_uninit(&ip->i_gh); | 717 | gfs2_holder_uninit(&ip->i_gh); |
459 | } | ||
460 | |||
461 | return error; | 718 | return error; |
462 | } | 719 | } |
463 | 720 | ||
@@ -565,7 +822,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
565 | struct gfs2_inode *ip = GFS2_I(inode); | 822 | struct gfs2_inode *ip = GFS2_I(inode); |
566 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 823 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
567 | struct buffer_head *dibh; | 824 | struct buffer_head *dibh; |
568 | struct gfs2_alloc *al = &ip->i_alloc; | 825 | struct gfs2_alloc *al = ip->i_alloc; |
569 | struct gfs2_dinode *di; | 826 | struct gfs2_dinode *di; |
570 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); | 827 | unsigned int from = pos & (PAGE_CACHE_SIZE - 1); |
571 | unsigned int to = from + len; | 828 | unsigned int to = from + len; |
@@ -585,19 +842,16 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
585 | if (gfs2_is_stuffed(ip)) | 842 | if (gfs2_is_stuffed(ip)) |
586 | return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); | 843 | return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); |
587 | 844 | ||
588 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | 845 | if (!gfs2_is_writeback(ip)) |
589 | gfs2_page_add_databufs(ip, page, from, to); | 846 | gfs2_page_add_databufs(ip, page, from, to); |
590 | 847 | ||
591 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 848 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
592 | 849 | ||
593 | if (likely(ret >= 0)) { | 850 | if (likely(ret >= 0) && (inode->i_size > ip->i_di.di_size)) { |
594 | copied = ret; | 851 | di = (struct gfs2_dinode *)dibh->b_data; |
595 | if ((pos + copied) > inode->i_size) { | 852 | ip->i_di.di_size = inode->i_size; |
596 | di = (struct gfs2_dinode *)dibh->b_data; | 853 | di->di_size = cpu_to_be64(inode->i_size); |
597 | ip->i_di.di_size = inode->i_size; | 854 | mark_inode_dirty(inode); |
598 | di->di_size = cpu_to_be64(inode->i_size); | ||
599 | mark_inode_dirty(inode); | ||
600 | } | ||
601 | } | 855 | } |
602 | 856 | ||
603 | if (inode == sdp->sd_rindex) | 857 | if (inode == sdp->sd_rindex) |
@@ -606,7 +860,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
606 | brelse(dibh); | 860 | brelse(dibh); |
607 | gfs2_trans_end(sdp); | 861 | gfs2_trans_end(sdp); |
608 | failed: | 862 | failed: |
609 | if (al->al_requested) { | 863 | if (al) { |
610 | gfs2_inplace_release(ip); | 864 | gfs2_inplace_release(ip); |
611 | gfs2_quota_unlock(ip); | 865 | gfs2_quota_unlock(ip); |
612 | gfs2_alloc_put(ip); | 866 | gfs2_alloc_put(ip); |
@@ -625,11 +879,7 @@ failed: | |||
625 | 879 | ||
626 | static int gfs2_set_page_dirty(struct page *page) | 880 | static int gfs2_set_page_dirty(struct page *page) |
627 | { | 881 | { |
628 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 882 | SetPageChecked(page); |
629 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | ||
630 | |||
631 | if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) | ||
632 | SetPageChecked(page); | ||
633 | return __set_page_dirty_buffers(page); | 883 | return __set_page_dirty_buffers(page); |
634 | } | 884 | } |
635 | 885 | ||
@@ -653,7 +903,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) | |||
653 | return 0; | 903 | return 0; |
654 | 904 | ||
655 | if (!gfs2_is_stuffed(ip)) | 905 | if (!gfs2_is_stuffed(ip)) |
656 | dblock = generic_block_bmap(mapping, lblock, gfs2_get_block); | 906 | dblock = generic_block_bmap(mapping, lblock, gfs2_block_map); |
657 | 907 | ||
658 | gfs2_glock_dq_uninit(&i_gh); | 908 | gfs2_glock_dq_uninit(&i_gh); |
659 | 909 | ||
@@ -719,13 +969,9 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | |||
719 | { | 969 | { |
720 | /* | 970 | /* |
721 | * Should we return an error here? I can't see that O_DIRECT for | 971 | * Should we return an error here? I can't see that O_DIRECT for |
722 | * a journaled file makes any sense. For now we'll silently fall | 972 | * a stuffed file makes any sense. For now we'll silently fall |
723 | * back to buffered I/O, likewise we do the same for stuffed | 973 | * back to buffered I/O |
724 | * files since they are (a) small and (b) unaligned. | ||
725 | */ | 974 | */ |
726 | if (gfs2_is_jdata(ip)) | ||
727 | return 0; | ||
728 | |||
729 | if (gfs2_is_stuffed(ip)) | 975 | if (gfs2_is_stuffed(ip)) |
730 | return 0; | 976 | return 0; |
731 | 977 | ||
@@ -836,9 +1082,23 @@ cannot_release: | |||
836 | return 0; | 1082 | return 0; |
837 | } | 1083 | } |
838 | 1084 | ||
839 | const struct address_space_operations gfs2_file_aops = { | 1085 | static const struct address_space_operations gfs2_writeback_aops = { |
840 | .writepage = gfs2_writepage, | 1086 | .writepage = gfs2_writeback_writepage, |
841 | .writepages = gfs2_writepages, | 1087 | .writepages = gfs2_writeback_writepages, |
1088 | .readpage = gfs2_readpage, | ||
1089 | .readpages = gfs2_readpages, | ||
1090 | .sync_page = block_sync_page, | ||
1091 | .write_begin = gfs2_write_begin, | ||
1092 | .write_end = gfs2_write_end, | ||
1093 | .bmap = gfs2_bmap, | ||
1094 | .invalidatepage = gfs2_invalidatepage, | ||
1095 | .releasepage = gfs2_releasepage, | ||
1096 | .direct_IO = gfs2_direct_IO, | ||
1097 | .migratepage = buffer_migrate_page, | ||
1098 | }; | ||
1099 | |||
1100 | static const struct address_space_operations gfs2_ordered_aops = { | ||
1101 | .writepage = gfs2_ordered_writepage, | ||
842 | .readpage = gfs2_readpage, | 1102 | .readpage = gfs2_readpage, |
843 | .readpages = gfs2_readpages, | 1103 | .readpages = gfs2_readpages, |
844 | .sync_page = block_sync_page, | 1104 | .sync_page = block_sync_page, |
@@ -849,5 +1109,34 @@ const struct address_space_operations gfs2_file_aops = { | |||
849 | .invalidatepage = gfs2_invalidatepage, | 1109 | .invalidatepage = gfs2_invalidatepage, |
850 | .releasepage = gfs2_releasepage, | 1110 | .releasepage = gfs2_releasepage, |
851 | .direct_IO = gfs2_direct_IO, | 1111 | .direct_IO = gfs2_direct_IO, |
1112 | .migratepage = buffer_migrate_page, | ||
852 | }; | 1113 | }; |
853 | 1114 | ||
1115 | static const struct address_space_operations gfs2_jdata_aops = { | ||
1116 | .writepage = gfs2_jdata_writepage, | ||
1117 | .writepages = gfs2_jdata_writepages, | ||
1118 | .readpage = gfs2_readpage, | ||
1119 | .readpages = gfs2_readpages, | ||
1120 | .sync_page = block_sync_page, | ||
1121 | .write_begin = gfs2_write_begin, | ||
1122 | .write_end = gfs2_write_end, | ||
1123 | .set_page_dirty = gfs2_set_page_dirty, | ||
1124 | .bmap = gfs2_bmap, | ||
1125 | .invalidatepage = gfs2_invalidatepage, | ||
1126 | .releasepage = gfs2_releasepage, | ||
1127 | }; | ||
1128 | |||
1129 | void gfs2_set_aops(struct inode *inode) | ||
1130 | { | ||
1131 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1132 | |||
1133 | if (gfs2_is_writeback(ip)) | ||
1134 | inode->i_mapping->a_ops = &gfs2_writeback_aops; | ||
1135 | else if (gfs2_is_ordered(ip)) | ||
1136 | inode->i_mapping->a_ops = &gfs2_ordered_aops; | ||
1137 | else if (gfs2_is_jdata(ip)) | ||
1138 | inode->i_mapping->a_ops = &gfs2_jdata_aops; | ||
1139 | else | ||
1140 | BUG(); | ||
1141 | } | ||
1142 | |||