aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZach Brown <zach.brown@oracle.com>2005-12-15 17:28:17 -0500
committerJoel Becker <joel.becker@oracle.com>2006-01-03 14:45:42 -0500
commit994fc28c7b1e697ac56befe4aecabf23f0689f46 (patch)
treeda36d162e9bd077e9b5be385b28e2db90475c263
parent7063fbf2261194f72ee75afca67b3b38b554b5fa (diff)
[PATCH] add AOP_TRUNCATED_PAGE, prepend AOP_ to WRITEPAGE_ACTIVATE
readpage(), prepare_write(), and commit_write() callers are updated to understand the special return code AOP_TRUNCATED_PAGE in the style of writepage() and WRITEPAGE_ACTIVATE. AOP_TRUNCATED_PAGE tells the caller that the callee has unlocked the page and that the operation should be tried again with a new page. OCFS2 uses this to detect and work around a lock inversion in its aop methods. There should be no change in behaviour for methods that don't return AOP_TRUNCATED_PAGE. WRITEPAGE_ACTIVATE is also prepended with AOP_ for consistency and they are made enums so that kerneldoc can be used to document their semantics. Signed-off-by: Zach Brown <zach.brown@oracle.com>
-rw-r--r--drivers/block/loop.c23
-rw-r--r--drivers/block/rd.c4
-rw-r--r--fs/mpage.c2
-rw-r--r--include/linux/fs.h31
-rw-r--r--include/linux/writeback.h6
-rw-r--r--mm/filemap.c73
-rw-r--r--mm/readahead.c15
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/vmscan.c2
9 files changed, 113 insertions, 45 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 96c664af8d06..a452b13620a2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
213 struct address_space_operations *aops = mapping->a_ops; 213 struct address_space_operations *aops = mapping->a_ops;
214 pgoff_t index; 214 pgoff_t index;
215 unsigned offset, bv_offs; 215 unsigned offset, bv_offs;
216 int len, ret = 0; 216 int len, ret;
217 217
218 down(&mapping->host->i_sem); 218 down(&mapping->host->i_sem);
219 index = pos >> PAGE_CACHE_SHIFT; 219 index = pos >> PAGE_CACHE_SHIFT;
@@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
232 page = grab_cache_page(mapping, index); 232 page = grab_cache_page(mapping, index);
233 if (unlikely(!page)) 233 if (unlikely(!page))
234 goto fail; 234 goto fail;
235 if (unlikely(aops->prepare_write(file, page, offset, 235 ret = aops->prepare_write(file, page, offset,
236 offset + size))) 236 offset + size);
237 if (unlikely(ret)) {
238 if (ret == AOP_TRUNCATED_PAGE) {
239 page_cache_release(page);
240 continue;
241 }
237 goto unlock; 242 goto unlock;
243 }
238 transfer_result = lo_do_transfer(lo, WRITE, page, offset, 244 transfer_result = lo_do_transfer(lo, WRITE, page, offset,
239 bvec->bv_page, bv_offs, size, IV); 245 bvec->bv_page, bv_offs, size, IV);
240 if (unlikely(transfer_result)) { 246 if (unlikely(transfer_result)) {
@@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
251 kunmap_atomic(kaddr, KM_USER0); 257 kunmap_atomic(kaddr, KM_USER0);
252 } 258 }
253 flush_dcache_page(page); 259 flush_dcache_page(page);
254 if (unlikely(aops->commit_write(file, page, offset, 260 ret = aops->commit_write(file, page, offset,
255 offset + size))) 261 offset + size);
262 if (unlikely(ret)) {
263 if (ret == AOP_TRUNCATED_PAGE) {
264 page_cache_release(page);
265 continue;
266 }
256 goto unlock; 267 goto unlock;
268 }
257 if (unlikely(transfer_result)) 269 if (unlikely(transfer_result))
258 goto unlock; 270 goto unlock;
259 bv_offs += size; 271 bv_offs += size;
@@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
264 unlock_page(page); 276 unlock_page(page);
265 page_cache_release(page); 277 page_cache_release(page);
266 } 278 }
279 ret = 0;
267out: 280out:
268 up(&mapping->host->i_sem); 281 up(&mapping->host->i_sem);
269 return ret; 282 return ret;
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 68c60a5bcdab..ffd6abd6d5a0 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page,
154 154
155/* 155/*
156 * ->writepage to the the blockdev's mapping has to redirty the page so that the 156 * ->writepage to the the blockdev's mapping has to redirty the page so that the
157 * VM doesn't go and steal it. We return WRITEPAGE_ACTIVATE so that the VM 157 * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM
158 * won't try to (pointlessly) write the page again for a while. 158 * won't try to (pointlessly) write the page again for a while.
159 * 159 *
160 * Really, these pages should not be on the LRU at all. 160 * Really, these pages should not be on the LRU at all.
@@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
165 make_page_uptodate(page); 165 make_page_uptodate(page);
166 SetPageDirty(page); 166 SetPageDirty(page);
167 if (wbc->for_reclaim) 167 if (wbc->for_reclaim)
168 return WRITEPAGE_ACTIVATE; 168 return AOP_WRITEPAGE_ACTIVATE;
169 unlock_page(page); 169 unlock_page(page);
170 return 0; 170 return 0;
171} 171}
diff --git a/fs/mpage.c b/fs/mpage.c
index c5adcdddf3cc..f1d2d02bd4c8 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -721,7 +721,7 @@ retry:
721 &last_block_in_bio, &ret, wbc, 721 &last_block_in_bio, &ret, wbc,
722 page->mapping->a_ops->writepage); 722 page->mapping->a_ops->writepage);
723 } 723 }
724 if (unlikely(ret == WRITEPAGE_ACTIVATE)) 724 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
725 unlock_page(page); 725 unlock_page(page);
726 if (ret || (--(wbc->nr_to_write) <= 0)) 726 if (ret || (--(wbc->nr_to_write) <= 0))
727 done = 1; 727 done = 1;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc35b6ac778d..ed9a41a71e8b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -302,6 +302,37 @@ struct iattr {
302 */ 302 */
303#include <linux/quota.h> 303#include <linux/quota.h>
304 304
305/**
306 * enum positive_aop_returns - aop return codes with specific semantics
307 *
308 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
309 * completed, that the page is still locked, and
310 * should be considered active. The VM uses this hint
311 * to return the page to the active list -- it won't
312 * be a candidate for writeback again in the near
313 * future. Other callers must be careful to unlock
314 * the page if they get this return. Returned by
315 * writepage();
316 *
317 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
318 * unlocked it and the page might have been truncated.
319 * The caller should back up to acquiring a new page and
320 * trying again. The aop will be taking reasonable
321 * precautions not to livelock. If the caller held a page
322 * reference, it should drop it before retrying. Returned
323 * by readpage(), prepare_write(), and commit_write().
324 *
325 * address_space_operation functions return these large constants to indicate
326 * special semantics to the caller. These are much larger than the bytes in a
327 * page to allow for functions that return the number of bytes operated on in a
328 * given page.
329 */
330
331enum positive_aop_returns {
332 AOP_WRITEPAGE_ACTIVATE = 0x80000,
333 AOP_TRUNCATED_PAGE = 0x80001,
334};
335
305/* 336/*
306 * oh the beauties of C type declarations. 337 * oh the beauties of C type declarations.
307 */ 338 */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 343d883d69c5..64a36ba43b2f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -60,12 +60,6 @@ struct writeback_control {
60}; 60};
61 61
62/* 62/*
63 * ->writepage() return values (make these much larger than a pagesize, in
64 * case some fs is returning number-of-bytes-written from writepage)
65 */
66#define WRITEPAGE_ACTIVATE 0x80000 /* IO was not started: activate page */
67
68/*
69 * fs/fs-writeback.c 63 * fs/fs-writeback.c
70 */ 64 */
71void writeback_inodes(struct writeback_control *wbc); 65void writeback_inodes(struct writeback_control *wbc);
diff --git a/mm/filemap.c b/mm/filemap.c
index 33a28bfde158..6e1d08a2b8b9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -831,8 +831,13 @@ readpage:
831 /* Start the actual read. The read will unlock the page. */ 831 /* Start the actual read. The read will unlock the page. */
832 error = mapping->a_ops->readpage(filp, page); 832 error = mapping->a_ops->readpage(filp, page);
833 833
834 if (unlikely(error)) 834 if (unlikely(error)) {
835 if (error == AOP_TRUNCATED_PAGE) {
836 page_cache_release(page);
837 goto find_page;
838 }
835 goto readpage_error; 839 goto readpage_error;
840 }
836 841
837 if (!PageUptodate(page)) { 842 if (!PageUptodate(page)) {
838 lock_page(page); 843 lock_page(page);
@@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
1152{ 1157{
1153 struct address_space *mapping = file->f_mapping; 1158 struct address_space *mapping = file->f_mapping;
1154 struct page *page; 1159 struct page *page;
1155 int error; 1160 int ret;
1156 1161
1157 page = page_cache_alloc_cold(mapping); 1162 do {
1158 if (!page) 1163 page = page_cache_alloc_cold(mapping);
1159 return -ENOMEM; 1164 if (!page)
1165 return -ENOMEM;
1166
1167 ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
1168 if (ret == 0)
1169 ret = mapping->a_ops->readpage(file, page);
1170 else if (ret == -EEXIST)
1171 ret = 0; /* losing race to add is OK */
1160 1172
1161 error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
1162 if (!error) {
1163 error = mapping->a_ops->readpage(file, page);
1164 page_cache_release(page); 1173 page_cache_release(page);
1165 return error;
1166 }
1167 1174
1168 /* 1175 } while (ret == AOP_TRUNCATED_PAGE);
1169 * We arrive here in the unlikely event that someone 1176
1170 * raced with us and added our page to the cache first 1177 return ret;
1171 * or we are out of memory for radix-tree nodes.
1172 */
1173 page_cache_release(page);
1174 return error == -EEXIST ? 0 : error;
1175} 1178}
1176 1179
1177#define MMAP_LOTSAMISS (100) 1180#define MMAP_LOTSAMISS (100)
@@ -1331,10 +1334,14 @@ page_not_uptodate:
1331 goto success; 1334 goto success;
1332 } 1335 }
1333 1336
1334 if (!mapping->a_ops->readpage(file, page)) { 1337 error = mapping->a_ops->readpage(file, page);
1338 if (!error) {
1335 wait_on_page_locked(page); 1339 wait_on_page_locked(page);
1336 if (PageUptodate(page)) 1340 if (PageUptodate(page))
1337 goto success; 1341 goto success;
1342 } else if (error == AOP_TRUNCATED_PAGE) {
1343 page_cache_release(page);
1344 goto retry_find;
1338 } 1345 }
1339 1346
1340 /* 1347 /*
@@ -1358,10 +1365,14 @@ page_not_uptodate:
1358 goto success; 1365 goto success;
1359 } 1366 }
1360 ClearPageError(page); 1367 ClearPageError(page);
1361 if (!mapping->a_ops->readpage(file, page)) { 1368 error = mapping->a_ops->readpage(file, page);
1369 if (!error) {
1362 wait_on_page_locked(page); 1370 wait_on_page_locked(page);
1363 if (PageUptodate(page)) 1371 if (PageUptodate(page))
1364 goto success; 1372 goto success;
1373 } else if (error == AOP_TRUNCATED_PAGE) {
1374 page_cache_release(page);
1375 goto retry_find;
1365 } 1376 }
1366 1377
1367 /* 1378 /*
@@ -1444,10 +1455,14 @@ page_not_uptodate:
1444 goto success; 1455 goto success;
1445 } 1456 }
1446 1457
1447 if (!mapping->a_ops->readpage(file, page)) { 1458 error = mapping->a_ops->readpage(file, page);
1459 if (!error) {
1448 wait_on_page_locked(page); 1460 wait_on_page_locked(page);
1449 if (PageUptodate(page)) 1461 if (PageUptodate(page))
1450 goto success; 1462 goto success;
1463 } else if (error == AOP_TRUNCATED_PAGE) {
1464 page_cache_release(page);
1465 goto retry_find;
1451 } 1466 }
1452 1467
1453 /* 1468 /*
@@ -1470,10 +1485,14 @@ page_not_uptodate:
1470 } 1485 }
1471 1486
1472 ClearPageError(page); 1487 ClearPageError(page);
1473 if (!mapping->a_ops->readpage(file, page)) { 1488 error = mapping->a_ops->readpage(file, page);
1489 if (!error) {
1474 wait_on_page_locked(page); 1490 wait_on_page_locked(page);
1475 if (PageUptodate(page)) 1491 if (PageUptodate(page))
1476 goto success; 1492 goto success;
1493 } else if (error == AOP_TRUNCATED_PAGE) {
1494 page_cache_release(page);
1495 goto retry_find;
1477 } 1496 }
1478 1497
1479 /* 1498 /*
@@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1934 status = a_ops->prepare_write(file, page, offset, offset+bytes); 1953 status = a_ops->prepare_write(file, page, offset, offset+bytes);
1935 if (unlikely(status)) { 1954 if (unlikely(status)) {
1936 loff_t isize = i_size_read(inode); 1955 loff_t isize = i_size_read(inode);
1956
1957 if (status != AOP_TRUNCATED_PAGE)
1958 unlock_page(page);
1959 page_cache_release(page);
1960 if (status == AOP_TRUNCATED_PAGE)
1961 continue;
1937 /* 1962 /*
1938 * prepare_write() may have instantiated a few blocks 1963 * prepare_write() may have instantiated a few blocks
1939 * outside i_size. Trim these off again. 1964 * outside i_size. Trim these off again.
1940 */ 1965 */
1941 unlock_page(page);
1942 page_cache_release(page);
1943 if (pos + bytes > isize) 1966 if (pos + bytes > isize)
1944 vmtruncate(inode, isize); 1967 vmtruncate(inode, isize);
1945 break; 1968 break;
@@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1952 cur_iov, iov_base, bytes); 1975 cur_iov, iov_base, bytes);
1953 flush_dcache_page(page); 1976 flush_dcache_page(page);
1954 status = a_ops->commit_write(file, page, offset, offset+bytes); 1977 status = a_ops->commit_write(file, page, offset, offset+bytes);
1978 if (status == AOP_TRUNCATED_PAGE) {
1979 page_cache_release(page);
1980 continue;
1981 }
1955 if (likely(copied > 0)) { 1982 if (likely(copied > 0)) {
1956 if (!status) 1983 if (!status)
1957 status = copied; 1984 status = copied;
diff --git a/mm/readahead.c b/mm/readahead.c
index 72e7adbb87c7..8d6eeaaa6296 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
158{ 158{
159 unsigned page_idx; 159 unsigned page_idx;
160 struct pagevec lru_pvec; 160 struct pagevec lru_pvec;
161 int ret = 0; 161 int ret;
162 162
163 if (mapping->a_ops->readpages) { 163 if (mapping->a_ops->readpages) {
164 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); 164 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
@@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp,
171 list_del(&page->lru); 171 list_del(&page->lru);
172 if (!add_to_page_cache(page, mapping, 172 if (!add_to_page_cache(page, mapping,
173 page->index, GFP_KERNEL)) { 173 page->index, GFP_KERNEL)) {
174 mapping->a_ops->readpage(filp, page); 174 ret = mapping->a_ops->readpage(filp, page);
175 if (!pagevec_add(&lru_pvec, page)) 175 if (ret != AOP_TRUNCATED_PAGE) {
176 __pagevec_lru_add(&lru_pvec); 176 if (!pagevec_add(&lru_pvec, page))
177 } else { 177 __pagevec_lru_add(&lru_pvec);
178 page_cache_release(page); 178 continue;
179 } /* else fall through to release */
179 } 180 }
181 page_cache_release(page);
180 } 182 }
181 pagevec_lru_add(&lru_pvec); 183 pagevec_lru_add(&lru_pvec);
184 ret = 0;
182out: 185out:
183 return ret; 186 return ret;
184} 187}
diff --git a/mm/shmem.c b/mm/shmem.c
index dc25565a61e9..d9fc277940da 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -855,7 +855,7 @@ unlock:
855 swap_free(swap); 855 swap_free(swap);
856redirty: 856redirty:
857 set_page_dirty(page); 857 set_page_dirty(page);
858 return WRITEPAGE_ACTIVATE; /* Return with the page locked */ 858 return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */
859} 859}
860 860
861#ifdef CONFIG_NUMA 861#ifdef CONFIG_NUMA
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b0cd81c32de6..795a050fe471 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
367 res = mapping->a_ops->writepage(page, &wbc); 367 res = mapping->a_ops->writepage(page, &wbc);
368 if (res < 0) 368 if (res < 0)
369 handle_write_error(mapping, page, res); 369 handle_write_error(mapping, page, res);
370 if (res == WRITEPAGE_ACTIVATE) { 370 if (res == AOP_WRITEPAGE_ACTIVATE) {
371 ClearPageReclaim(page); 371 ClearPageReclaim(page);
372 return PAGE_ACTIVATE; 372 return PAGE_ACTIVATE;
373 } 373 }