diff options
Diffstat (limited to 'fs/nilfs2/page.c')
-rw-r--r-- | fs/nilfs2/page.c | 540 |
1 files changed, 540 insertions, 0 deletions
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c new file mode 100644 index 000000000000..1bfbba9c0e9a --- /dev/null +++ b/fs/nilfs2/page.c | |||
@@ -0,0 +1,540 @@ | |||
1 | /* | ||
2 | * page.c - buffer/page management specific to NILFS | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net>, | ||
21 | * Seiji Kihara <kihara@osrg.net>. | ||
22 | */ | ||
23 | |||
24 | #include <linux/pagemap.h> | ||
25 | #include <linux/writeback.h> | ||
26 | #include <linux/swap.h> | ||
27 | #include <linux/bitops.h> | ||
28 | #include <linux/page-flags.h> | ||
29 | #include <linux/list.h> | ||
30 | #include <linux/highmem.h> | ||
31 | #include <linux/pagevec.h> | ||
32 | #include "nilfs.h" | ||
33 | #include "page.h" | ||
34 | #include "mdt.h" | ||
35 | |||
36 | |||
37 | #define NILFS_BUFFER_INHERENT_BITS \ | ||
38 | ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ | ||
39 | (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) | ||
40 | |||
41 | static struct buffer_head * | ||
42 | __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, | ||
43 | int blkbits, unsigned long b_state) | ||
44 | |||
45 | { | ||
46 | unsigned long first_block; | ||
47 | struct buffer_head *bh; | ||
48 | |||
49 | if (!page_has_buffers(page)) | ||
50 | create_empty_buffers(page, 1 << blkbits, b_state); | ||
51 | |||
52 | first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); | ||
53 | bh = nilfs_page_get_nth_block(page, block - first_block); | ||
54 | |||
55 | touch_buffer(bh); | ||
56 | wait_on_buffer(bh); | ||
57 | return bh; | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * Since the page cache of B-tree node pages or data page cache of pseudo | ||
62 | * inodes does not have a valid mapping->host pointer, calling | ||
63 | * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; | ||
64 | * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). | ||
65 | * To avoid this problem, the old style mark_buffer_dirty() is used instead. | ||
66 | */ | ||
67 | void nilfs_mark_buffer_dirty(struct buffer_head *bh) | ||
68 | { | ||
69 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) | ||
70 | __set_page_dirty_nobuffers(bh->b_page); | ||
71 | } | ||
72 | |||
73 | struct buffer_head *nilfs_grab_buffer(struct inode *inode, | ||
74 | struct address_space *mapping, | ||
75 | unsigned long blkoff, | ||
76 | unsigned long b_state) | ||
77 | { | ||
78 | int blkbits = inode->i_blkbits; | ||
79 | pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); | ||
80 | struct page *page, *opage; | ||
81 | struct buffer_head *bh, *obh; | ||
82 | |||
83 | page = grab_cache_page(mapping, index); | ||
84 | if (unlikely(!page)) | ||
85 | return NULL; | ||
86 | |||
87 | bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); | ||
88 | if (unlikely(!bh)) { | ||
89 | unlock_page(page); | ||
90 | page_cache_release(page); | ||
91 | return NULL; | ||
92 | } | ||
93 | if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { | ||
94 | /* | ||
95 | * Shadow page cache uses assoc_mapping to point its original | ||
96 | * page cache. The following code tries the original cache | ||
97 | * if the given cache is a shadow and it didn't hit. | ||
98 | */ | ||
99 | opage = find_lock_page(mapping->assoc_mapping, index); | ||
100 | if (!opage) | ||
101 | return bh; | ||
102 | |||
103 | obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, | ||
104 | b_state); | ||
105 | if (buffer_uptodate(obh)) { | ||
106 | nilfs_copy_buffer(bh, obh); | ||
107 | if (buffer_dirty(obh)) { | ||
108 | nilfs_mark_buffer_dirty(bh); | ||
109 | if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) | ||
110 | nilfs_mdt_mark_dirty(inode); | ||
111 | } | ||
112 | } | ||
113 | brelse(obh); | ||
114 | unlock_page(opage); | ||
115 | page_cache_release(opage); | ||
116 | } | ||
117 | return bh; | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * nilfs_forget_buffer - discard dirty state | ||
122 | * @inode: owner inode of the buffer | ||
123 | * @bh: buffer head of the buffer to be discarded | ||
124 | */ | ||
125 | void nilfs_forget_buffer(struct buffer_head *bh) | ||
126 | { | ||
127 | struct page *page = bh->b_page; | ||
128 | |||
129 | lock_buffer(bh); | ||
130 | clear_buffer_nilfs_volatile(bh); | ||
131 | if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page)) | ||
132 | __nilfs_clear_page_dirty(page); | ||
133 | |||
134 | clear_buffer_uptodate(bh); | ||
135 | clear_buffer_mapped(bh); | ||
136 | bh->b_blocknr = -1; | ||
137 | ClearPageUptodate(page); | ||
138 | ClearPageMappedToDisk(page); | ||
139 | unlock_buffer(bh); | ||
140 | brelse(bh); | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * nilfs_copy_buffer -- copy buffer data and flags | ||
145 | * @dbh: destination buffer | ||
146 | * @sbh: source buffer | ||
147 | */ | ||
148 | void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) | ||
149 | { | ||
150 | void *kaddr0, *kaddr1; | ||
151 | unsigned long bits; | ||
152 | struct page *spage = sbh->b_page, *dpage = dbh->b_page; | ||
153 | struct buffer_head *bh; | ||
154 | |||
155 | kaddr0 = kmap_atomic(spage, KM_USER0); | ||
156 | kaddr1 = kmap_atomic(dpage, KM_USER1); | ||
157 | memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); | ||
158 | kunmap_atomic(kaddr1, KM_USER1); | ||
159 | kunmap_atomic(kaddr0, KM_USER0); | ||
160 | |||
161 | dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; | ||
162 | dbh->b_blocknr = sbh->b_blocknr; | ||
163 | dbh->b_bdev = sbh->b_bdev; | ||
164 | |||
165 | bh = dbh; | ||
166 | bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); | ||
167 | while ((bh = bh->b_this_page) != dbh) { | ||
168 | lock_buffer(bh); | ||
169 | bits &= bh->b_state; | ||
170 | unlock_buffer(bh); | ||
171 | } | ||
172 | if (bits & (1UL << BH_Uptodate)) | ||
173 | SetPageUptodate(dpage); | ||
174 | else | ||
175 | ClearPageUptodate(dpage); | ||
176 | if (bits & (1UL << BH_Mapped)) | ||
177 | SetPageMappedToDisk(dpage); | ||
178 | else | ||
179 | ClearPageMappedToDisk(dpage); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * nilfs_page_buffers_clean - check if a page has dirty buffers or not. | ||
184 | * @page: page to be checked | ||
185 | * | ||
186 | * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. | ||
187 | * Otherwise, it returns non-zero value. | ||
188 | */ | ||
189 | int nilfs_page_buffers_clean(struct page *page) | ||
190 | { | ||
191 | struct buffer_head *bh, *head; | ||
192 | |||
193 | bh = head = page_buffers(page); | ||
194 | do { | ||
195 | if (buffer_dirty(bh)) | ||
196 | return 0; | ||
197 | bh = bh->b_this_page; | ||
198 | } while (bh != head); | ||
199 | return 1; | ||
200 | } | ||
201 | |||
202 | void nilfs_page_bug(struct page *page) | ||
203 | { | ||
204 | struct address_space *m; | ||
205 | unsigned long ino = 0; | ||
206 | |||
207 | if (unlikely(!page)) { | ||
208 | printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); | ||
209 | return; | ||
210 | } | ||
211 | |||
212 | m = page->mapping; | ||
213 | if (m) { | ||
214 | struct inode *inode = NILFS_AS_I(m); | ||
215 | if (inode != NULL) | ||
216 | ino = inode->i_ino; | ||
217 | } | ||
218 | printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " | ||
219 | "mapping=%p ino=%lu\n", | ||
220 | page, atomic_read(&page->_count), | ||
221 | (unsigned long long)page->index, page->flags, m, ino); | ||
222 | |||
223 | if (page_has_buffers(page)) { | ||
224 | struct buffer_head *bh, *head; | ||
225 | int i = 0; | ||
226 | |||
227 | bh = head = page_buffers(page); | ||
228 | do { | ||
229 | printk(KERN_CRIT | ||
230 | " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", | ||
231 | i++, bh, atomic_read(&bh->b_count), | ||
232 | (unsigned long long)bh->b_blocknr, bh->b_state); | ||
233 | bh = bh->b_this_page; | ||
234 | } while (bh != head); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * nilfs_alloc_private_page - allocate a private page with buffer heads | ||
240 | * | ||
241 | * Return Value: On success, a pointer to the allocated page is returned. | ||
242 | * On error, NULL is returned. | ||
243 | */ | ||
244 | struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, | ||
245 | unsigned long state) | ||
246 | { | ||
247 | struct buffer_head *bh, *head, *tail; | ||
248 | struct page *page; | ||
249 | |||
250 | page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ | ||
251 | if (unlikely(!page)) | ||
252 | return NULL; | ||
253 | |||
254 | lock_page(page); | ||
255 | head = alloc_page_buffers(page, size, 0); | ||
256 | if (unlikely(!head)) { | ||
257 | unlock_page(page); | ||
258 | __free_page(page); | ||
259 | return NULL; | ||
260 | } | ||
261 | |||
262 | bh = head; | ||
263 | do { | ||
264 | bh->b_state = (1UL << BH_NILFS_Allocated) | state; | ||
265 | tail = bh; | ||
266 | bh->b_bdev = bdev; | ||
267 | bh = bh->b_this_page; | ||
268 | } while (bh); | ||
269 | |||
270 | tail->b_this_page = head; | ||
271 | attach_page_buffers(page, head); | ||
272 | |||
273 | return page; | ||
274 | } | ||
275 | |||
276 | void nilfs_free_private_page(struct page *page) | ||
277 | { | ||
278 | BUG_ON(!PageLocked(page)); | ||
279 | BUG_ON(page->mapping); | ||
280 | |||
281 | if (page_has_buffers(page) && !try_to_free_buffers(page)) | ||
282 | NILFS_PAGE_BUG(page, "failed to free page"); | ||
283 | |||
284 | unlock_page(page); | ||
285 | __free_page(page); | ||
286 | } | ||
287 | |||
288 | /** | ||
289 | * nilfs_copy_page -- copy the page with buffers | ||
290 | * @dst: destination page | ||
291 | * @src: source page | ||
292 | * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. | ||
293 | * | ||
294 | * This fuction is for both data pages and btnode pages. The dirty flag | ||
295 | * should be treated by caller. The page must not be under i/o. | ||
296 | * Both src and dst page must be locked | ||
297 | */ | ||
298 | static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) | ||
299 | { | ||
300 | struct buffer_head *dbh, *dbufs, *sbh, *sbufs; | ||
301 | unsigned long mask = NILFS_BUFFER_INHERENT_BITS; | ||
302 | |||
303 | BUG_ON(PageWriteback(dst)); | ||
304 | |||
305 | sbh = sbufs = page_buffers(src); | ||
306 | if (!page_has_buffers(dst)) | ||
307 | create_empty_buffers(dst, sbh->b_size, 0); | ||
308 | |||
309 | if (copy_dirty) | ||
310 | mask |= (1UL << BH_Dirty); | ||
311 | |||
312 | dbh = dbufs = page_buffers(dst); | ||
313 | do { | ||
314 | lock_buffer(sbh); | ||
315 | lock_buffer(dbh); | ||
316 | dbh->b_state = sbh->b_state & mask; | ||
317 | dbh->b_blocknr = sbh->b_blocknr; | ||
318 | dbh->b_bdev = sbh->b_bdev; | ||
319 | sbh = sbh->b_this_page; | ||
320 | dbh = dbh->b_this_page; | ||
321 | } while (dbh != dbufs); | ||
322 | |||
323 | copy_highpage(dst, src); | ||
324 | |||
325 | if (PageUptodate(src) && !PageUptodate(dst)) | ||
326 | SetPageUptodate(dst); | ||
327 | else if (!PageUptodate(src) && PageUptodate(dst)) | ||
328 | ClearPageUptodate(dst); | ||
329 | if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) | ||
330 | SetPageMappedToDisk(dst); | ||
331 | else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) | ||
332 | ClearPageMappedToDisk(dst); | ||
333 | |||
334 | do { | ||
335 | unlock_buffer(sbh); | ||
336 | unlock_buffer(dbh); | ||
337 | sbh = sbh->b_this_page; | ||
338 | dbh = dbh->b_this_page; | ||
339 | } while (dbh != dbufs); | ||
340 | } | ||
341 | |||
342 | int nilfs_copy_dirty_pages(struct address_space *dmap, | ||
343 | struct address_space *smap) | ||
344 | { | ||
345 | struct pagevec pvec; | ||
346 | unsigned int i; | ||
347 | pgoff_t index = 0; | ||
348 | int err = 0; | ||
349 | |||
350 | pagevec_init(&pvec, 0); | ||
351 | repeat: | ||
352 | if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, | ||
353 | PAGEVEC_SIZE)) | ||
354 | return 0; | ||
355 | |||
356 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
357 | struct page *page = pvec.pages[i], *dpage; | ||
358 | |||
359 | lock_page(page); | ||
360 | if (unlikely(!PageDirty(page))) | ||
361 | NILFS_PAGE_BUG(page, "inconsistent dirty state"); | ||
362 | |||
363 | dpage = grab_cache_page(dmap, page->index); | ||
364 | if (unlikely(!dpage)) { | ||
365 | /* No empty page is added to the page cache */ | ||
366 | err = -ENOMEM; | ||
367 | unlock_page(page); | ||
368 | break; | ||
369 | } | ||
370 | if (unlikely(!page_has_buffers(page))) | ||
371 | NILFS_PAGE_BUG(page, | ||
372 | "found empty page in dat page cache"); | ||
373 | |||
374 | nilfs_copy_page(dpage, page, 1); | ||
375 | __set_page_dirty_nobuffers(dpage); | ||
376 | |||
377 | unlock_page(dpage); | ||
378 | page_cache_release(dpage); | ||
379 | unlock_page(page); | ||
380 | } | ||
381 | pagevec_release(&pvec); | ||
382 | cond_resched(); | ||
383 | |||
384 | if (likely(!err)) | ||
385 | goto repeat; | ||
386 | return err; | ||
387 | } | ||
388 | |||
389 | /** | ||
390 | * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache | ||
391 | * @dmap: destination page cache | ||
392 | * @smap: source page cache | ||
393 | * | ||
394 | * No pages must no be added to the cache during this process. | ||
395 | * This must be ensured by the caller. | ||
396 | */ | ||
397 | void nilfs_copy_back_pages(struct address_space *dmap, | ||
398 | struct address_space *smap) | ||
399 | { | ||
400 | struct pagevec pvec; | ||
401 | unsigned int i, n; | ||
402 | pgoff_t index = 0; | ||
403 | int err; | ||
404 | |||
405 | pagevec_init(&pvec, 0); | ||
406 | repeat: | ||
407 | n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); | ||
408 | if (!n) | ||
409 | return; | ||
410 | index = pvec.pages[n - 1]->index + 1; | ||
411 | |||
412 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
413 | struct page *page = pvec.pages[i], *dpage; | ||
414 | pgoff_t offset = page->index; | ||
415 | |||
416 | lock_page(page); | ||
417 | dpage = find_lock_page(dmap, offset); | ||
418 | if (dpage) { | ||
419 | /* override existing page on the destination cache */ | ||
420 | WARN_ON(PageDirty(dpage)); | ||
421 | nilfs_copy_page(dpage, page, 0); | ||
422 | unlock_page(dpage); | ||
423 | page_cache_release(dpage); | ||
424 | } else { | ||
425 | struct page *page2; | ||
426 | |||
427 | /* move the page to the destination cache */ | ||
428 | spin_lock_irq(&smap->tree_lock); | ||
429 | page2 = radix_tree_delete(&smap->page_tree, offset); | ||
430 | WARN_ON(page2 != page); | ||
431 | |||
432 | smap->nrpages--; | ||
433 | spin_unlock_irq(&smap->tree_lock); | ||
434 | |||
435 | spin_lock_irq(&dmap->tree_lock); | ||
436 | err = radix_tree_insert(&dmap->page_tree, offset, page); | ||
437 | if (unlikely(err < 0)) { | ||
438 | WARN_ON(err == -EEXIST); | ||
439 | page->mapping = NULL; | ||
440 | page_cache_release(page); /* for cache */ | ||
441 | } else { | ||
442 | page->mapping = dmap; | ||
443 | dmap->nrpages++; | ||
444 | if (PageDirty(page)) | ||
445 | radix_tree_tag_set(&dmap->page_tree, | ||
446 | offset, | ||
447 | PAGECACHE_TAG_DIRTY); | ||
448 | } | ||
449 | spin_unlock_irq(&dmap->tree_lock); | ||
450 | } | ||
451 | unlock_page(page); | ||
452 | } | ||
453 | pagevec_release(&pvec); | ||
454 | cond_resched(); | ||
455 | |||
456 | goto repeat; | ||
457 | } | ||
458 | |||
459 | void nilfs_clear_dirty_pages(struct address_space *mapping) | ||
460 | { | ||
461 | struct pagevec pvec; | ||
462 | unsigned int i; | ||
463 | pgoff_t index = 0; | ||
464 | |||
465 | pagevec_init(&pvec, 0); | ||
466 | |||
467 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | ||
468 | PAGEVEC_SIZE)) { | ||
469 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
470 | struct page *page = pvec.pages[i]; | ||
471 | struct buffer_head *bh, *head; | ||
472 | |||
473 | lock_page(page); | ||
474 | ClearPageUptodate(page); | ||
475 | ClearPageMappedToDisk(page); | ||
476 | bh = head = page_buffers(page); | ||
477 | do { | ||
478 | lock_buffer(bh); | ||
479 | clear_buffer_dirty(bh); | ||
480 | clear_buffer_nilfs_volatile(bh); | ||
481 | clear_buffer_uptodate(bh); | ||
482 | clear_buffer_mapped(bh); | ||
483 | unlock_buffer(bh); | ||
484 | bh = bh->b_this_page; | ||
485 | } while (bh != head); | ||
486 | |||
487 | __nilfs_clear_page_dirty(page); | ||
488 | unlock_page(page); | ||
489 | } | ||
490 | pagevec_release(&pvec); | ||
491 | cond_resched(); | ||
492 | } | ||
493 | } | ||
494 | |||
495 | unsigned nilfs_page_count_clean_buffers(struct page *page, | ||
496 | unsigned from, unsigned to) | ||
497 | { | ||
498 | unsigned block_start, block_end; | ||
499 | struct buffer_head *bh, *head; | ||
500 | unsigned nc = 0; | ||
501 | |||
502 | for (bh = head = page_buffers(page), block_start = 0; | ||
503 | bh != head || !block_start; | ||
504 | block_start = block_end, bh = bh->b_this_page) { | ||
505 | block_end = block_start + bh->b_size; | ||
506 | if (block_end > from && block_start < to && !buffer_dirty(bh)) | ||
507 | nc++; | ||
508 | } | ||
509 | return nc; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * NILFS2 needs clear_page_dirty() in the following two cases: | ||
514 | * | ||
515 | * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears | ||
516 | * page dirty flags when it copies back pages from the shadow cache | ||
517 | * (gcdat->{i_mapping,i_btnode_cache}) to its original cache | ||
518 | * (dat->{i_mapping,i_btnode_cache}). | ||
519 | * | ||
520 | * 2) Some B-tree operations like insertion or deletion may dispose buffers | ||
521 | * in dirty state, and this needs to cancel the dirty state of their pages. | ||
522 | */ | ||
523 | int __nilfs_clear_page_dirty(struct page *page) | ||
524 | { | ||
525 | struct address_space *mapping = page->mapping; | ||
526 | |||
527 | if (mapping) { | ||
528 | spin_lock_irq(&mapping->tree_lock); | ||
529 | if (test_bit(PG_dirty, &page->flags)) { | ||
530 | radix_tree_tag_clear(&mapping->page_tree, | ||
531 | page_index(page), | ||
532 | PAGECACHE_TAG_DIRTY); | ||
533 | spin_unlock_irq(&mapping->tree_lock); | ||
534 | return clear_page_dirty_for_io(page); | ||
535 | } | ||
536 | spin_unlock_irq(&mapping->tree_lock); | ||
537 | return 0; | ||
538 | } | ||
539 | return TestClearPageDirty(page); | ||
540 | } | ||