diff options
author | Zach Brown <zach.brown@oracle.com> | 2005-12-15 17:28:17 -0500 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2006-01-03 14:45:42 -0500 |
commit | 994fc28c7b1e697ac56befe4aecabf23f0689f46 (patch) | |
tree | da36d162e9bd077e9b5be385b28e2db90475c263 | |
parent | 7063fbf2261194f72ee75afca67b3b38b554b5fa (diff) |
[PATCH] add AOP_TRUNCATED_PAGE, prepend AOP_ to WRITEPAGE_ACTIVATE
readpage(), prepare_write(), and commit_write() callers are updated to
understand the special return code AOP_TRUNCATED_PAGE in the style of
writepage() and WRITEPAGE_ACTIVATE. AOP_TRUNCATED_PAGE tells the caller that
the callee has unlocked the page and that the operation should be tried again
with a new page. OCFS2 uses this to detect and work around a lock inversion in
its aop methods. There should be no change in behaviour for methods that don't
return AOP_TRUNCATED_PAGE.
WRITEPAGE_ACTIVATE is also prepended with AOP_ for consistency and they are
made enums so that kerneldoc can be used to document their semantics.
Signed-off-by: Zach Brown <zach.brown@oracle.com>
-rw-r--r-- | drivers/block/loop.c | 23 | ||||
-rw-r--r-- | drivers/block/rd.c | 4 | ||||
-rw-r--r-- | fs/mpage.c | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 31 | ||||
-rw-r--r-- | include/linux/writeback.h | 6 | ||||
-rw-r--r-- | mm/filemap.c | 73 | ||||
-rw-r--r-- | mm/readahead.c | 15 | ||||
-rw-r--r-- | mm/shmem.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
9 files changed, 113 insertions, 45 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 96c664af8d06..a452b13620a2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | |||
213 | struct address_space_operations *aops = mapping->a_ops; | 213 | struct address_space_operations *aops = mapping->a_ops; |
214 | pgoff_t index; | 214 | pgoff_t index; |
215 | unsigned offset, bv_offs; | 215 | unsigned offset, bv_offs; |
216 | int len, ret = 0; | 216 | int len, ret; |
217 | 217 | ||
218 | down(&mapping->host->i_sem); | 218 | down(&mapping->host->i_sem); |
219 | index = pos >> PAGE_CACHE_SHIFT; | 219 | index = pos >> PAGE_CACHE_SHIFT; |
@@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | |||
232 | page = grab_cache_page(mapping, index); | 232 | page = grab_cache_page(mapping, index); |
233 | if (unlikely(!page)) | 233 | if (unlikely(!page)) |
234 | goto fail; | 234 | goto fail; |
235 | if (unlikely(aops->prepare_write(file, page, offset, | 235 | ret = aops->prepare_write(file, page, offset, |
236 | offset + size))) | 236 | offset + size); |
237 | if (unlikely(ret)) { | ||
238 | if (ret == AOP_TRUNCATED_PAGE) { | ||
239 | page_cache_release(page); | ||
240 | continue; | ||
241 | } | ||
237 | goto unlock; | 242 | goto unlock; |
243 | } | ||
238 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, | 244 | transfer_result = lo_do_transfer(lo, WRITE, page, offset, |
239 | bvec->bv_page, bv_offs, size, IV); | 245 | bvec->bv_page, bv_offs, size, IV); |
240 | if (unlikely(transfer_result)) { | 246 | if (unlikely(transfer_result)) { |
@@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | |||
251 | kunmap_atomic(kaddr, KM_USER0); | 257 | kunmap_atomic(kaddr, KM_USER0); |
252 | } | 258 | } |
253 | flush_dcache_page(page); | 259 | flush_dcache_page(page); |
254 | if (unlikely(aops->commit_write(file, page, offset, | 260 | ret = aops->commit_write(file, page, offset, |
255 | offset + size))) | 261 | offset + size); |
262 | if (unlikely(ret)) { | ||
263 | if (ret == AOP_TRUNCATED_PAGE) { | ||
264 | page_cache_release(page); | ||
265 | continue; | ||
266 | } | ||
256 | goto unlock; | 267 | goto unlock; |
268 | } | ||
257 | if (unlikely(transfer_result)) | 269 | if (unlikely(transfer_result)) |
258 | goto unlock; | 270 | goto unlock; |
259 | bv_offs += size; | 271 | bv_offs += size; |
@@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, | |||
264 | unlock_page(page); | 276 | unlock_page(page); |
265 | page_cache_release(page); | 277 | page_cache_release(page); |
266 | } | 278 | } |
279 | ret = 0; | ||
267 | out: | 280 | out: |
268 | up(&mapping->host->i_sem); | 281 | up(&mapping->host->i_sem); |
269 | return ret; | 282 | return ret; |
diff --git a/drivers/block/rd.c b/drivers/block/rd.c index 68c60a5bcdab..ffd6abd6d5a0 100644 --- a/drivers/block/rd.c +++ b/drivers/block/rd.c | |||
@@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page, | |||
154 | 154 | ||
155 | /* | 155 | /* |
156 | * ->writepage to the the blockdev's mapping has to redirty the page so that the | 156 | * ->writepage to the the blockdev's mapping has to redirty the page so that the |
157 | * VM doesn't go and steal it. We return WRITEPAGE_ACTIVATE so that the VM | 157 | * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM |
158 | * won't try to (pointlessly) write the page again for a while. | 158 | * won't try to (pointlessly) write the page again for a while. |
159 | * | 159 | * |
160 | * Really, these pages should not be on the LRU at all. | 160 | * Really, these pages should not be on the LRU at all. |
@@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) | |||
165 | make_page_uptodate(page); | 165 | make_page_uptodate(page); |
166 | SetPageDirty(page); | 166 | SetPageDirty(page); |
167 | if (wbc->for_reclaim) | 167 | if (wbc->for_reclaim) |
168 | return WRITEPAGE_ACTIVATE; | 168 | return AOP_WRITEPAGE_ACTIVATE; |
169 | unlock_page(page); | 169 | unlock_page(page); |
170 | return 0; | 170 | return 0; |
171 | } | 171 | } |
diff --git a/fs/mpage.c b/fs/mpage.c index c5adcdddf3cc..f1d2d02bd4c8 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -721,7 +721,7 @@ retry: | |||
721 | &last_block_in_bio, &ret, wbc, | 721 | &last_block_in_bio, &ret, wbc, |
722 | page->mapping->a_ops->writepage); | 722 | page->mapping->a_ops->writepage); |
723 | } | 723 | } |
724 | if (unlikely(ret == WRITEPAGE_ACTIVATE)) | 724 | if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) |
725 | unlock_page(page); | 725 | unlock_page(page); |
726 | if (ret || (--(wbc->nr_to_write) <= 0)) | 726 | if (ret || (--(wbc->nr_to_write) <= 0)) |
727 | done = 1; | 727 | done = 1; |
diff --git a/include/linux/fs.h b/include/linux/fs.h index cc35b6ac778d..ed9a41a71e8b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -302,6 +302,37 @@ struct iattr { | |||
302 | */ | 302 | */ |
303 | #include <linux/quota.h> | 303 | #include <linux/quota.h> |
304 | 304 | ||
305 | /** | ||
306 | * enum positive_aop_returns - aop return codes with specific semantics | ||
307 | * | ||
308 | * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has | ||
309 | * completed, that the page is still locked, and | ||
310 | * should be considered active. The VM uses this hint | ||
311 | * to return the page to the active list -- it won't | ||
312 | * be a candidate for writeback again in the near | ||
313 | * future. Other callers must be careful to unlock | ||
314 | * the page if they get this return. Returned by | ||
315 | * writepage(); | ||
316 | * | ||
317 | * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has | ||
318 | * unlocked it and the page might have been truncated. | ||
319 | * The caller should back up to acquiring a new page and | ||
320 | * trying again. The aop will be taking reasonable | ||
321 | * precautions not to livelock. If the caller held a page | ||
322 | * reference, it should drop it before retrying. Returned | ||
323 | * by readpage(), prepare_write(), and commit_write(). | ||
324 | * | ||
325 | * address_space_operation functions return these large constants to indicate | ||
326 | * special semantics to the caller. These are much larger than the bytes in a | ||
327 | * page to allow for functions that return the number of bytes operated on in a | ||
328 | * given page. | ||
329 | */ | ||
330 | |||
331 | enum positive_aop_returns { | ||
332 | AOP_WRITEPAGE_ACTIVATE = 0x80000, | ||
333 | AOP_TRUNCATED_PAGE = 0x80001, | ||
334 | }; | ||
335 | |||
305 | /* | 336 | /* |
306 | * oh the beauties of C type declarations. | 337 | * oh the beauties of C type declarations. |
307 | */ | 338 | */ |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 343d883d69c5..64a36ba43b2f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -60,12 +60,6 @@ struct writeback_control { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * ->writepage() return values (make these much larger than a pagesize, in | ||
64 | * case some fs is returning number-of-bytes-written from writepage) | ||
65 | */ | ||
66 | #define WRITEPAGE_ACTIVATE 0x80000 /* IO was not started: activate page */ | ||
67 | |||
68 | /* | ||
69 | * fs/fs-writeback.c | 63 | * fs/fs-writeback.c |
70 | */ | 64 | */ |
71 | void writeback_inodes(struct writeback_control *wbc); | 65 | void writeback_inodes(struct writeback_control *wbc); |
diff --git a/mm/filemap.c b/mm/filemap.c index 33a28bfde158..6e1d08a2b8b9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -831,8 +831,13 @@ readpage: | |||
831 | /* Start the actual read. The read will unlock the page. */ | 831 | /* Start the actual read. The read will unlock the page. */ |
832 | error = mapping->a_ops->readpage(filp, page); | 832 | error = mapping->a_ops->readpage(filp, page); |
833 | 833 | ||
834 | if (unlikely(error)) | 834 | if (unlikely(error)) { |
835 | if (error == AOP_TRUNCATED_PAGE) { | ||
836 | page_cache_release(page); | ||
837 | goto find_page; | ||
838 | } | ||
835 | goto readpage_error; | 839 | goto readpage_error; |
840 | } | ||
836 | 841 | ||
837 | if (!PageUptodate(page)) { | 842 | if (!PageUptodate(page)) { |
838 | lock_page(page); | 843 | lock_page(page); |
@@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) | |||
1152 | { | 1157 | { |
1153 | struct address_space *mapping = file->f_mapping; | 1158 | struct address_space *mapping = file->f_mapping; |
1154 | struct page *page; | 1159 | struct page *page; |
1155 | int error; | 1160 | int ret; |
1156 | 1161 | ||
1157 | page = page_cache_alloc_cold(mapping); | 1162 | do { |
1158 | if (!page) | 1163 | page = page_cache_alloc_cold(mapping); |
1159 | return -ENOMEM; | 1164 | if (!page) |
1165 | return -ENOMEM; | ||
1166 | |||
1167 | ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL); | ||
1168 | if (ret == 0) | ||
1169 | ret = mapping->a_ops->readpage(file, page); | ||
1170 | else if (ret == -EEXIST) | ||
1171 | ret = 0; /* losing race to add is OK */ | ||
1160 | 1172 | ||
1161 | error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL); | ||
1162 | if (!error) { | ||
1163 | error = mapping->a_ops->readpage(file, page); | ||
1164 | page_cache_release(page); | 1173 | page_cache_release(page); |
1165 | return error; | ||
1166 | } | ||
1167 | 1174 | ||
1168 | /* | 1175 | } while (ret == AOP_TRUNCATED_PAGE); |
1169 | * We arrive here in the unlikely event that someone | 1176 | |
1170 | * raced with us and added our page to the cache first | 1177 | return ret; |
1171 | * or we are out of memory for radix-tree nodes. | ||
1172 | */ | ||
1173 | page_cache_release(page); | ||
1174 | return error == -EEXIST ? 0 : error; | ||
1175 | } | 1178 | } |
1176 | 1179 | ||
1177 | #define MMAP_LOTSAMISS (100) | 1180 | #define MMAP_LOTSAMISS (100) |
@@ -1331,10 +1334,14 @@ page_not_uptodate: | |||
1331 | goto success; | 1334 | goto success; |
1332 | } | 1335 | } |
1333 | 1336 | ||
1334 | if (!mapping->a_ops->readpage(file, page)) { | 1337 | error = mapping->a_ops->readpage(file, page); |
1338 | if (!error) { | ||
1335 | wait_on_page_locked(page); | 1339 | wait_on_page_locked(page); |
1336 | if (PageUptodate(page)) | 1340 | if (PageUptodate(page)) |
1337 | goto success; | 1341 | goto success; |
1342 | } else if (error == AOP_TRUNCATED_PAGE) { | ||
1343 | page_cache_release(page); | ||
1344 | goto retry_find; | ||
1338 | } | 1345 | } |
1339 | 1346 | ||
1340 | /* | 1347 | /* |
@@ -1358,10 +1365,14 @@ page_not_uptodate: | |||
1358 | goto success; | 1365 | goto success; |
1359 | } | 1366 | } |
1360 | ClearPageError(page); | 1367 | ClearPageError(page); |
1361 | if (!mapping->a_ops->readpage(file, page)) { | 1368 | error = mapping->a_ops->readpage(file, page); |
1369 | if (!error) { | ||
1362 | wait_on_page_locked(page); | 1370 | wait_on_page_locked(page); |
1363 | if (PageUptodate(page)) | 1371 | if (PageUptodate(page)) |
1364 | goto success; | 1372 | goto success; |
1373 | } else if (error == AOP_TRUNCATED_PAGE) { | ||
1374 | page_cache_release(page); | ||
1375 | goto retry_find; | ||
1365 | } | 1376 | } |
1366 | 1377 | ||
1367 | /* | 1378 | /* |
@@ -1444,10 +1455,14 @@ page_not_uptodate: | |||
1444 | goto success; | 1455 | goto success; |
1445 | } | 1456 | } |
1446 | 1457 | ||
1447 | if (!mapping->a_ops->readpage(file, page)) { | 1458 | error = mapping->a_ops->readpage(file, page); |
1459 | if (!error) { | ||
1448 | wait_on_page_locked(page); | 1460 | wait_on_page_locked(page); |
1449 | if (PageUptodate(page)) | 1461 | if (PageUptodate(page)) |
1450 | goto success; | 1462 | goto success; |
1463 | } else if (error == AOP_TRUNCATED_PAGE) { | ||
1464 | page_cache_release(page); | ||
1465 | goto retry_find; | ||
1451 | } | 1466 | } |
1452 | 1467 | ||
1453 | /* | 1468 | /* |
@@ -1470,10 +1485,14 @@ page_not_uptodate: | |||
1470 | } | 1485 | } |
1471 | 1486 | ||
1472 | ClearPageError(page); | 1487 | ClearPageError(page); |
1473 | if (!mapping->a_ops->readpage(file, page)) { | 1488 | error = mapping->a_ops->readpage(file, page); |
1489 | if (!error) { | ||
1474 | wait_on_page_locked(page); | 1490 | wait_on_page_locked(page); |
1475 | if (PageUptodate(page)) | 1491 | if (PageUptodate(page)) |
1476 | goto success; | 1492 | goto success; |
1493 | } else if (error == AOP_TRUNCATED_PAGE) { | ||
1494 | page_cache_release(page); | ||
1495 | goto retry_find; | ||
1477 | } | 1496 | } |
1478 | 1497 | ||
1479 | /* | 1498 | /* |
@@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1934 | status = a_ops->prepare_write(file, page, offset, offset+bytes); | 1953 | status = a_ops->prepare_write(file, page, offset, offset+bytes); |
1935 | if (unlikely(status)) { | 1954 | if (unlikely(status)) { |
1936 | loff_t isize = i_size_read(inode); | 1955 | loff_t isize = i_size_read(inode); |
1956 | |||
1957 | if (status != AOP_TRUNCATED_PAGE) | ||
1958 | unlock_page(page); | ||
1959 | page_cache_release(page); | ||
1960 | if (status == AOP_TRUNCATED_PAGE) | ||
1961 | continue; | ||
1937 | /* | 1962 | /* |
1938 | * prepare_write() may have instantiated a few blocks | 1963 | * prepare_write() may have instantiated a few blocks |
1939 | * outside i_size. Trim these off again. | 1964 | * outside i_size. Trim these off again. |
1940 | */ | 1965 | */ |
1941 | unlock_page(page); | ||
1942 | page_cache_release(page); | ||
1943 | if (pos + bytes > isize) | 1966 | if (pos + bytes > isize) |
1944 | vmtruncate(inode, isize); | 1967 | vmtruncate(inode, isize); |
1945 | break; | 1968 | break; |
@@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1952 | cur_iov, iov_base, bytes); | 1975 | cur_iov, iov_base, bytes); |
1953 | flush_dcache_page(page); | 1976 | flush_dcache_page(page); |
1954 | status = a_ops->commit_write(file, page, offset, offset+bytes); | 1977 | status = a_ops->commit_write(file, page, offset, offset+bytes); |
1978 | if (status == AOP_TRUNCATED_PAGE) { | ||
1979 | page_cache_release(page); | ||
1980 | continue; | ||
1981 | } | ||
1955 | if (likely(copied > 0)) { | 1982 | if (likely(copied > 0)) { |
1956 | if (!status) | 1983 | if (!status) |
1957 | status = copied; | 1984 | status = copied; |
diff --git a/mm/readahead.c b/mm/readahead.c index 72e7adbb87c7..8d6eeaaa6296 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp, | |||
158 | { | 158 | { |
159 | unsigned page_idx; | 159 | unsigned page_idx; |
160 | struct pagevec lru_pvec; | 160 | struct pagevec lru_pvec; |
161 | int ret = 0; | 161 | int ret; |
162 | 162 | ||
163 | if (mapping->a_ops->readpages) { | 163 | if (mapping->a_ops->readpages) { |
164 | ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); | 164 | ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); |
@@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp, | |||
171 | list_del(&page->lru); | 171 | list_del(&page->lru); |
172 | if (!add_to_page_cache(page, mapping, | 172 | if (!add_to_page_cache(page, mapping, |
173 | page->index, GFP_KERNEL)) { | 173 | page->index, GFP_KERNEL)) { |
174 | mapping->a_ops->readpage(filp, page); | 174 | ret = mapping->a_ops->readpage(filp, page); |
175 | if (!pagevec_add(&lru_pvec, page)) | 175 | if (ret != AOP_TRUNCATED_PAGE) { |
176 | __pagevec_lru_add(&lru_pvec); | 176 | if (!pagevec_add(&lru_pvec, page)) |
177 | } else { | 177 | __pagevec_lru_add(&lru_pvec); |
178 | page_cache_release(page); | 178 | continue; |
179 | } /* else fall through to release */ | ||
179 | } | 180 | } |
181 | page_cache_release(page); | ||
180 | } | 182 | } |
181 | pagevec_lru_add(&lru_pvec); | 183 | pagevec_lru_add(&lru_pvec); |
184 | ret = 0; | ||
182 | out: | 185 | out: |
183 | return ret; | 186 | return ret; |
184 | } | 187 | } |
diff --git a/mm/shmem.c b/mm/shmem.c index dc25565a61e9..d9fc277940da 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -855,7 +855,7 @@ unlock: | |||
855 | swap_free(swap); | 855 | swap_free(swap); |
856 | redirty: | 856 | redirty: |
857 | set_page_dirty(page); | 857 | set_page_dirty(page); |
858 | return WRITEPAGE_ACTIVATE; /* Return with the page locked */ | 858 | return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */ |
859 | } | 859 | } |
860 | 860 | ||
861 | #ifdef CONFIG_NUMA | 861 | #ifdef CONFIG_NUMA |
diff --git a/mm/vmscan.c b/mm/vmscan.c index b0cd81c32de6..795a050fe471 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) | |||
367 | res = mapping->a_ops->writepage(page, &wbc); | 367 | res = mapping->a_ops->writepage(page, &wbc); |
368 | if (res < 0) | 368 | if (res < 0) |
369 | handle_write_error(mapping, page, res); | 369 | handle_write_error(mapping, page, res); |
370 | if (res == WRITEPAGE_ACTIVATE) { | 370 | if (res == AOP_WRITEPAGE_ACTIVATE) { |
371 | ClearPageReclaim(page); | 371 | ClearPageReclaim(page); |
372 | return PAGE_ACTIVATE; | 372 | return PAGE_ACTIVATE; |
373 | } | 373 | } |