diff options
Diffstat (limited to 'fs/gfs2/ops_address.c')
-rw-r--r-- | fs/gfs2/ops_address.c | 134 |
1 files changed, 79 insertions, 55 deletions
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index d8d69a72a10d..56e33590b656 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/pagevec.h> | 16 | #include <linux/pagevec.h> |
17 | #include <linux/mpage.h> | 17 | #include <linux/mpage.h> |
18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
19 | #include <linux/writeback.h> | ||
19 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
20 | #include <linux/lm_interface.h> | 21 | #include <linux/lm_interface.h> |
21 | 22 | ||
@@ -157,6 +158,32 @@ out_ignore: | |||
157 | } | 158 | } |
158 | 159 | ||
159 | /** | 160 | /** |
161 | * gfs2_writepages - Write a bunch of dirty pages back to disk | ||
162 | * @mapping: The mapping to write | ||
163 | * @wbc: Write-back control | ||
164 | * | ||
165 | * For journaled files and/or ordered writes this just falls back to the | ||
166 | * kernel's default writepages path for now. We will probably want to change | ||
167 | * that eventually (i.e. when we look at allocate on flush). | ||
168 | * | ||
169 | * For the data=writeback case though we can already ignore buffer heads | ||
170 | * and write whole extents at once. This is a big reduction in the | ||
171 | * number of I/O requests we send and the bmap calls we make in this case. | ||
172 | */ | ||
173 | static int gfs2_writepages(struct address_space *mapping, | ||
174 | struct writeback_control *wbc) | ||
175 | { | ||
176 | struct inode *inode = mapping->host; | ||
177 | struct gfs2_inode *ip = GFS2_I(inode); | ||
178 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
179 | |||
180 | if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip)) | ||
181 | return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); | ||
182 | |||
183 | return generic_writepages(mapping, wbc); | ||
184 | } | ||
185 | |||
186 | /** | ||
160 | * stuffed_readpage - Fill in a Linux page with stuffed file data | 187 | * stuffed_readpage - Fill in a Linux page with stuffed file data |
161 | * @ip: the inode | 188 | * @ip: the inode |
162 | * @page: the page | 189 | * @page: the page |
@@ -256,7 +283,7 @@ out_unlock: | |||
256 | * the page lock and the glock) and return having done no I/O. Its | 283 | * the page lock and the glock) and return having done no I/O. Its |
257 | * obviously not something we'd want to do on too regular a basis. | 284 | * obviously not something we'd want to do on too regular a basis. |
258 | * Any I/O we ignore at this time will be done via readpage later. | 285 | * Any I/O we ignore at this time will be done via readpage later. |
259 | * 2. We have to handle stuffed files here too. | 286 | * 2. We don't handle stuffed files here we let readpage do the honours. |
260 | * 3. mpage_readpages() does most of the heavy lifting in the common case. | 287 | * 3. mpage_readpages() does most of the heavy lifting in the common case. |
261 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. | 288 | * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. |
262 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as | 289 | * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as |
@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
269 | struct gfs2_inode *ip = GFS2_I(inode); | 296 | struct gfs2_inode *ip = GFS2_I(inode); |
270 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 297 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
271 | struct gfs2_holder gh; | 298 | struct gfs2_holder gh; |
272 | unsigned page_idx; | 299 | int ret = 0; |
273 | int ret; | ||
274 | int do_unlock = 0; | 300 | int do_unlock = 0; |
275 | 301 | ||
276 | if (likely(file != &gfs2_internal_file_sentinel)) { | 302 | if (likely(file != &gfs2_internal_file_sentinel)) { |
@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
289 | goto out_unlock; | 315 | goto out_unlock; |
290 | } | 316 | } |
291 | skip_lock: | 317 | skip_lock: |
292 | if (gfs2_is_stuffed(ip)) { | 318 | if (!gfs2_is_stuffed(ip)) |
293 | struct pagevec lru_pvec; | ||
294 | pagevec_init(&lru_pvec, 0); | ||
295 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | ||
296 | struct page *page = list_entry(pages->prev, struct page, lru); | ||
297 | prefetchw(&page->flags); | ||
298 | list_del(&page->lru); | ||
299 | if (!add_to_page_cache(page, mapping, | ||
300 | page->index, GFP_KERNEL)) { | ||
301 | ret = stuffed_readpage(ip, page); | ||
302 | unlock_page(page); | ||
303 | if (!pagevec_add(&lru_pvec, page)) | ||
304 | __pagevec_lru_add(&lru_pvec); | ||
305 | } else { | ||
306 | page_cache_release(page); | ||
307 | } | ||
308 | } | ||
309 | pagevec_lru_add(&lru_pvec); | ||
310 | ret = 0; | ||
311 | } else { | ||
312 | /* What we really want to do .... */ | ||
313 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); | 319 | ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); |
314 | } | ||
315 | 320 | ||
316 | if (do_unlock) { | 321 | if (do_unlock) { |
317 | gfs2_glock_dq_m(1, &gh); | 322 | gfs2_glock_dq_m(1, &gh); |
@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page, | |||
356 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); | 361 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); |
357 | error = gfs2_glock_nq_atime(&ip->i_gh); | 362 | error = gfs2_glock_nq_atime(&ip->i_gh); |
358 | if (unlikely(error)) { | 363 | if (unlikely(error)) { |
359 | if (error == GLR_TRYFAILED) | 364 | if (error == GLR_TRYFAILED) { |
365 | unlock_page(page); | ||
360 | error = AOP_TRUNCATED_PAGE; | 366 | error = AOP_TRUNCATED_PAGE; |
367 | } | ||
361 | goto out_uninit; | 368 | goto out_uninit; |
362 | } | 369 | } |
363 | 370 | ||
@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset) | |||
594 | return; | 601 | return; |
595 | } | 602 | } |
596 | 603 | ||
604 | /** | ||
605 | * gfs2_ok_for_dio - check that dio is valid on this file | ||
606 | * @ip: The inode | ||
607 | * @rw: READ or WRITE | ||
608 | * @offset: The offset at which we are reading or writing | ||
609 | * | ||
610 | * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) | ||
611 | * 1 (to accept the i/o request) | ||
612 | */ | ||
613 | static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | ||
614 | { | ||
615 | /* | ||
616 | * Should we return an error here? I can't see that O_DIRECT for | ||
617 | * a journaled file makes any sense. For now we'll silently fall | ||
618 | * back to buffered I/O, likewise we do the same for stuffed | ||
619 | * files since they are (a) small and (b) unaligned. | ||
620 | */ | ||
621 | if (gfs2_is_jdata(ip)) | ||
622 | return 0; | ||
623 | |||
624 | if (gfs2_is_stuffed(ip)) | ||
625 | return 0; | ||
626 | |||
627 | if (offset > i_size_read(&ip->i_inode)) | ||
628 | return 0; | ||
629 | return 1; | ||
630 | } | ||
631 | |||
632 | |||
633 | |||
597 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | 634 | static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, |
598 | const struct iovec *iov, loff_t offset, | 635 | const struct iovec *iov, loff_t offset, |
599 | unsigned long nr_segs) | 636 | unsigned long nr_segs) |
@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
604 | struct gfs2_holder gh; | 641 | struct gfs2_holder gh; |
605 | int rv; | 642 | int rv; |
606 | 643 | ||
607 | if (rw == READ) | ||
608 | mutex_lock(&inode->i_mutex); | ||
609 | /* | 644 | /* |
610 | * Shared lock, even if its a write, since we do no allocation | 645 | * Deferred lock, even if its a write, since we do no allocation |
611 | * on this path. All we need change is atime. | 646 | * on this path. All we need change is atime, and this lock mode |
647 | * ensures that other nodes have flushed their buffered read caches | ||
648 | * (i.e. their page cache entries for this inode). We do not, | ||
649 | * unfortunately have the option of only flushing a range like | ||
650 | * the VFS does. | ||
612 | */ | 651 | */ |
613 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 652 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); |
614 | rv = gfs2_glock_nq_atime(&gh); | 653 | rv = gfs2_glock_nq_atime(&gh); |
615 | if (rv) | 654 | if (rv) |
616 | goto out; | 655 | return rv; |
617 | 656 | rv = gfs2_ok_for_dio(ip, rw, offset); | |
618 | if (offset > i_size_read(inode)) | 657 | if (rv != 1) |
619 | goto out; | 658 | goto out; /* dio not valid, fall back to buffered i/o */ |
620 | 659 | ||
621 | /* | 660 | rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, |
622 | * Should we return an error here? I can't see that O_DIRECT for | 661 | iov, offset, nr_segs, |
623 | * a journaled file makes any sense. For now we'll silently fall | 662 | gfs2_get_block_direct, NULL); |
624 | * back to buffered I/O, likewise we do the same for stuffed | ||
625 | * files since they are (a) small and (b) unaligned. | ||
626 | */ | ||
627 | if (gfs2_is_jdata(ip)) | ||
628 | goto out; | ||
629 | |||
630 | if (gfs2_is_stuffed(ip)) | ||
631 | goto out; | ||
632 | |||
633 | rv = blockdev_direct_IO_own_locking(rw, iocb, inode, | ||
634 | inode->i_sb->s_bdev, | ||
635 | iov, offset, nr_segs, | ||
636 | gfs2_get_block_direct, NULL); | ||
637 | out: | 663 | out: |
638 | gfs2_glock_dq_m(1, &gh); | 664 | gfs2_glock_dq_m(1, &gh); |
639 | gfs2_holder_uninit(&gh); | 665 | gfs2_holder_uninit(&gh); |
640 | if (rw == READ) | ||
641 | mutex_unlock(&inode->i_mutex); | ||
642 | |||
643 | return rv; | 666 | return rv; |
644 | } | 667 | } |
645 | 668 | ||
@@ -763,6 +786,7 @@ out: | |||
763 | 786 | ||
764 | const struct address_space_operations gfs2_file_aops = { | 787 | const struct address_space_operations gfs2_file_aops = { |
765 | .writepage = gfs2_writepage, | 788 | .writepage = gfs2_writepage, |
789 | .writepages = gfs2_writepages, | ||
766 | .readpage = gfs2_readpage, | 790 | .readpage = gfs2_readpage, |
767 | .readpages = gfs2_readpages, | 791 | .readpages = gfs2_readpages, |
768 | .sync_page = block_sync_page, | 792 | .sync_page = block_sync_page, |