aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2006-03-24 06:18:04 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-24 10:33:25 -0500
commitebcf28e1c7a295f3321249dd235ad2e45938fdd9 (patch)
treefdd2e131e627af55d3741a7fafad0edaa61410c1 /mm
parent469eb4d03878b676418f853011ebfb54ccf83a5e (diff)
[PATCH] fadvise(): write commands
Add two new linux-specific fadvise extensions(): LINUX_FADV_ASYNC_WRITE: start async writeout of any dirty pages between file offsets `offset' and `offset+len'. Any pages which are currently under writeout are skipped, whether or not they are dirty. LINUX_FADV_WRITE_WAIT: wait upon writeout of any dirty pages between file offsets `offset' and `offset+len'. By combining these two operations the application may do several things: LINUX_FADV_ASYNC_WRITE: push some or all of the dirty pages at the disk. LINUX_FADV_WRITE_WAIT, LINUX_FADV_ASYNC_WRITE: push all of the currently dirty pages at the disk. LINUX_FADV_WRITE_WAIT, LINUX_FADV_ASYNC_WRITE, LINUX_FADV_WRITE_WAIT: push all of the currently dirty pages at the disk, wait until they have been written. It should be noted that none of these operations write out the file's metadata. So unless the application is strictly performing overwrites of already-instantiated disk blocks, there are no guarantees here that the data will be available after a crash. To complete this suite of operations I guess we should have a "sync file metadata only" operation. This gives applications access to all the building blocks needed for all sorts of sync operations. But sync-metadata doesn't fit well with the fadvise() interface. Probably it should be a new syscall: sys_fmetadatasync(). The patch also diddles with the meaning of `endbyte' in sys_fadvise64_64(). It is made to represent that last affected byte in the file (ie: it is inclusive). Generally, all these byterange and pagerange functions are inclusive so we can easily represent EOF with -1. As Ulrich notes, these two functions are somewhat abusive of the fadvise() concept, which appears to be "set the future policy for this fd". But these commands are a perfect fit with the fadvise() impementation, and several of the existing fadvise() commands are synchronous and don't affect future policy either. I think we can live with the slight incongruity. Cc: Michael Kerrisk <mtk-manpages@gmx.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/fadvise.c46
-rw-r--r--mm/filemap.c10
2 files changed, 46 insertions, 10 deletions
diff --git a/mm/fadvise.c b/mm/fadvise.c
index d257c89e7704..907c39257ca0 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -15,6 +15,7 @@
15#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
16#include <linux/pagevec.h> 16#include <linux/pagevec.h>
17#include <linux/fadvise.h> 17#include <linux/fadvise.h>
18#include <linux/writeback.h>
18#include <linux/syscalls.h> 19#include <linux/syscalls.h>
19 20
20#include <asm/unistd.h> 21#include <asm/unistd.h>
@@ -22,13 +23,36 @@
22/* 23/*
23 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 24 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
24 * deactivate the pages and clear PG_Referenced. 25 * deactivate the pages and clear PG_Referenced.
26 *
27 * LINUX_FADV_ASYNC_WRITE: start async writeout of any dirty pages between file
28 * offsets `offset' and `offset+len' inclusive. Any pages which are currently
29 * under writeout are skipped, whether or not they are dirty.
30 *
31 * LINUX_FADV_WRITE_WAIT: wait upon writeout of any dirty pages between file
32 * offsets `offset' and `offset+len'.
33 *
34 * By combining these two operations the application may do several things:
35 *
36 * LINUX_FADV_ASYNC_WRITE: push some or all of the dirty pages at the disk.
37 *
38 * LINUX_FADV_WRITE_WAIT, LINUX_FADV_ASYNC_WRITE: push all of the currently
39 * dirty pages at the disk.
40 *
41 * LINUX_FADV_WRITE_WAIT, LINUX_FADV_ASYNC_WRITE, LINUX_FADV_WRITE_WAIT: push
42 * all of the currently dirty pages at the disk, wait until they have been
43 * written.
44 *
45 * It should be noted that none of these operations write out the file's
46 * metadata. So unless the application is strictly performing overwrites of
47 * already-instantiated disk blocks, there are no guarantees here that the data
48 * will be available after a crash.
25 */ 49 */
26asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) 50asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
27{ 51{
28 struct file *file = fget(fd); 52 struct file *file = fget(fd);
29 struct address_space *mapping; 53 struct address_space *mapping;
30 struct backing_dev_info *bdi; 54 struct backing_dev_info *bdi;
31 loff_t endbyte; 55 loff_t endbyte; /* inclusive */
32 pgoff_t start_index; 56 pgoff_t start_index;
33 pgoff_t end_index; 57 pgoff_t end_index;
34 unsigned long nrpages; 58 unsigned long nrpages;
@@ -56,6 +80,8 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
56 endbyte = offset + len; 80 endbyte = offset + len;
57 if (!len || endbyte < len) 81 if (!len || endbyte < len)
58 endbyte = -1; 82 endbyte = -1;
83 else
84 endbyte--; /* inclusive */
59 85
60 bdi = mapping->backing_dev_info; 86 bdi = mapping->backing_dev_info;
61 87
@@ -78,7 +104,7 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
78 104
79 /* First and last PARTIAL page! */ 105 /* First and last PARTIAL page! */
80 start_index = offset >> PAGE_CACHE_SHIFT; 106 start_index = offset >> PAGE_CACHE_SHIFT;
81 end_index = (endbyte-1) >> PAGE_CACHE_SHIFT; 107 end_index = endbyte >> PAGE_CACHE_SHIFT;
82 108
83 /* Careful about overflow on the "+1" */ 109 /* Careful about overflow on the "+1" */
84 nrpages = end_index - start_index + 1; 110 nrpages = end_index - start_index + 1;
@@ -96,11 +122,21 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
96 filemap_flush(mapping); 122 filemap_flush(mapping);
97 123
98 /* First and last FULL page! */ 124 /* First and last FULL page! */
99 start_index = (offset + (PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT; 125 start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
100 end_index = (endbyte >> PAGE_CACHE_SHIFT); 126 end_index = (endbyte >> PAGE_CACHE_SHIFT);
101 127
102 if (end_index > start_index) 128 if (end_index >= start_index)
103 invalidate_mapping_pages(mapping, start_index, end_index-1); 129 invalidate_mapping_pages(mapping, start_index,
130 end_index);
131 break;
132 case LINUX_FADV_ASYNC_WRITE:
133 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
134 WB_SYNC_NONE);
135 break;
136 case LINUX_FADV_WRITE_WAIT:
137 ret = wait_on_page_writeback_range(mapping,
138 offset >> PAGE_CACHE_SHIFT,
139 endbyte >> PAGE_CACHE_SHIFT);
104 break; 140 break;
105 default: 141 default:
106 ret = -EINVAL; 142 ret = -EINVAL;
diff --git a/mm/filemap.c b/mm/filemap.c
index c1b1708cc95d..3ef20739e725 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -183,8 +183,8 @@ static int sync_page(void *word)
183 * these two operations is that if a dirty page/buffer is encountered, it must 183 * these two operations is that if a dirty page/buffer is encountered, it must
184 * be waited upon, and not just skipped over. 184 * be waited upon, and not just skipped over.
185 */ 185 */
186static int __filemap_fdatawrite_range(struct address_space *mapping, 186int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
187 loff_t start, loff_t end, int sync_mode) 187 loff_t end, int sync_mode)
188{ 188{
189 int ret; 189 int ret;
190 struct writeback_control wbc = { 190 struct writeback_control wbc = {
@@ -213,8 +213,8 @@ int filemap_fdatawrite(struct address_space *mapping)
213} 213}
214EXPORT_SYMBOL(filemap_fdatawrite); 214EXPORT_SYMBOL(filemap_fdatawrite);
215 215
216static int filemap_fdatawrite_range(struct address_space *mapping, 216static int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
217 loff_t start, loff_t end) 217 loff_t end)
218{ 218{
219 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL); 219 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
220} 220}
@@ -233,7 +233,7 @@ EXPORT_SYMBOL(filemap_flush);
233 * Wait for writeback to complete against pages indexed by start->end 233 * Wait for writeback to complete against pages indexed by start->end
234 * inclusive 234 * inclusive
235 */ 235 */
236static int wait_on_page_writeback_range(struct address_space *mapping, 236int wait_on_page_writeback_range(struct address_space *mapping,
237 pgoff_t start, pgoff_t end) 237 pgoff_t start, pgoff_t end)
238{ 238{
239 struct pagevec pvec; 239 struct pagevec pvec;