aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nilfs2/page.c
diff options
context:
space:
mode:
authorRyusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>2009-04-06 22:01:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-07 11:31:13 -0400
commit0bd49f9446130a6a3914eb07b54db489e3222b34 (patch)
tree5122a227c9cb1cfe92a74c53baf1bb7ca7053435 /fs/nilfs2/page.c
parenta60be987d45dd510aeb54389526f9957cfab106c (diff)
nilfs2: buffer and page operations
This adds common routines for buffer/page operations used in B-tree node caches, meta data files, or segment constructor (log writer). NILFS uses copy functions for buffers and pages due to the following reasons: 1) Relocation required for COW Since NILFS changes address of on-disk blocks, moving buffers in page cache is needed for the buffers which are not addressed by a file offset. If buffer size is smaller than page size, this involves partial copy of pages. 2) Freezing mmapped pages NILFS calculates checksums for each log to ensure its validity. If page data changes after the checksum calculation, this validity check will not work correctly. To avoid this failure for mmaped pages, NILFS freezes their data by copying. 3) Copy-on-write for DAT pages NILFS makes clones of DAT page caches in a copy-on-write manner during GC processes, and this ensures atomicity and consistency of the DAT in the transient state. In addition, NILFS uses two obsolete functions, nilfs_mark_buffer_dirty() and nilfs_clear_page_dirty() respectively. * nilfs_mark_buffer_dirty() was required to avoid NULL pointer dereference faults: Since the page cache of B-tree node pages or data page cache of pseudo inodes does not have a valid mapping->host, calling mark_buffer_dirty() for their buffers causes the fault; it calls __mark_inode_dirty(NULL) through __set_page_dirty(). * nilfs_clear_page_dirty() was needed in the two cases: 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears page dirty flags when it copies back pages from the cloned cache (gcdat->{i_mapping,i_btnode_cache}) to its original cache (dat->{i_mapping,i_btnode_cache}). 2) Some B-tree operations like insertion or deletion may dispose buffers in dirty state, and this needs to cancel the dirty state of their pages. clear_page_dirty_for_io() caused faults because it does not clear the dirty tag on the page cache. Signed-off-by: Seiji Kihara <kihara.seiji@lab.ntt.co.jp> Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/nilfs2/page.c')
-rw-r--r--fs/nilfs2/page.c542
1 files changed, 542 insertions, 0 deletions
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
new file mode 100644
index 000000000000..7b18be8cd47a
--- /dev/null
+++ b/fs/nilfs2/page.c
@@ -0,0 +1,542 @@
1/*
2 * page.c - buffer/page management specific to NILFS
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>,
21 * Seiji Kihara <kihara@osrg.net>.
22 */
23
24#include <linux/pagemap.h>
25#include <linux/writeback.h>
26#include <linux/swap.h>
27#include <linux/bitops.h>
28#include <linux/page-flags.h>
29#include <linux/list.h>
30#include <linux/highmem.h>
31#include <linux/pagevec.h>
32#include "nilfs.h"
33#include "page.h"
34#include "mdt.h"
35
36
37#define NILFS_BUFFER_INHERENT_BITS \
38 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
39 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated))
40
41static struct buffer_head *
42__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
43 int blkbits, unsigned long b_state)
44
45{
46 unsigned long first_block;
47 struct buffer_head *bh;
48
49 if (!page_has_buffers(page))
50 create_empty_buffers(page, 1 << blkbits, b_state);
51
52 first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
53 bh = nilfs_page_get_nth_block(page, block - first_block);
54
55 touch_buffer(bh);
56 wait_on_buffer(bh);
57 return bh;
58}
59
60/*
61 * Since the page cache of B-tree node pages or data page cache of pseudo
62 * inodes does not have a valid mapping->host pointer, calling
63 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
64 * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
65 * To avoid this problem, the old style mark_buffer_dirty() is used instead.
66 */
67void nilfs_mark_buffer_dirty(struct buffer_head *bh)
68{
69 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
70 __set_page_dirty_nobuffers(bh->b_page);
71}
72
73struct buffer_head *nilfs_grab_buffer(struct inode *inode,
74 struct address_space *mapping,
75 unsigned long blkoff,
76 unsigned long b_state)
77{
78 int blkbits = inode->i_blkbits;
79 pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
80 struct page *page, *opage;
81 struct buffer_head *bh, *obh;
82
83 page = grab_cache_page(mapping, index);
84 if (unlikely(!page))
85 return NULL;
86
87 bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
88 if (unlikely(!bh)) {
89 unlock_page(page);
90 page_cache_release(page);
91 return NULL;
92 }
93 if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) {
94 /*
95 * Shadow page cache uses assoc_mapping to point its original
96 * page cache. The following code tries the original cache
97 * if the given cache is a shadow and it didn't hit.
98 */
99 opage = find_lock_page(mapping->assoc_mapping, index);
100 if (!opage)
101 return bh;
102
103 obh = __nilfs_get_page_block(opage, blkoff, index, blkbits,
104 b_state);
105 if (buffer_uptodate(obh)) {
106 nilfs_copy_buffer(bh, obh);
107 if (buffer_dirty(obh)) {
108 nilfs_mark_buffer_dirty(bh);
109 if (!buffer_nilfs_node(bh) && NILFS_MDT(inode))
110 nilfs_mdt_mark_dirty(inode);
111 }
112 }
113 brelse(obh);
114 unlock_page(opage);
115 page_cache_release(opage);
116 }
117 return bh;
118}
119
120/**
121 * nilfs_forget_buffer - discard dirty state
122 * @inode: owner inode of the buffer
123 * @bh: buffer head of the buffer to be discarded
124 */
125void nilfs_forget_buffer(struct buffer_head *bh)
126{
127 struct page *page = bh->b_page;
128
129 lock_buffer(bh);
130 clear_buffer_nilfs_volatile(bh);
131 if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page))
132 __nilfs_clear_page_dirty(page);
133
134 clear_buffer_uptodate(bh);
135 clear_buffer_mapped(bh);
136 bh->b_blocknr = -1;
137 ClearPageUptodate(page);
138 ClearPageMappedToDisk(page);
139 unlock_buffer(bh);
140 brelse(bh);
141}
142
143/**
144 * nilfs_copy_buffer -- copy buffer data and flags
145 * @dbh: destination buffer
146 * @sbh: source buffer
147 */
148void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
149{
150 void *kaddr0, *kaddr1;
151 unsigned long bits;
152 struct page *spage = sbh->b_page, *dpage = dbh->b_page;
153 struct buffer_head *bh;
154
155 kaddr0 = kmap_atomic(spage, KM_USER0);
156 kaddr1 = kmap_atomic(dpage, KM_USER1);
157 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
158 kunmap_atomic(kaddr1, KM_USER1);
159 kunmap_atomic(kaddr0, KM_USER0);
160
161 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
162 dbh->b_blocknr = sbh->b_blocknr;
163 dbh->b_bdev = sbh->b_bdev;
164
165 bh = dbh;
166 bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
167 while ((bh = bh->b_this_page) != dbh) {
168 lock_buffer(bh);
169 bits &= bh->b_state;
170 unlock_buffer(bh);
171 }
172 if (bits & (1UL << BH_Uptodate))
173 SetPageUptodate(dpage);
174 else
175 ClearPageUptodate(dpage);
176 if (bits & (1UL << BH_Mapped))
177 SetPageMappedToDisk(dpage);
178 else
179 ClearPageMappedToDisk(dpage);
180}
181
182/**
183 * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
184 * @page: page to be checked
185 *
186 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
187 * Otherwise, it returns non-zero value.
188 */
189int nilfs_page_buffers_clean(struct page *page)
190{
191 struct buffer_head *bh, *head;
192
193 bh = head = page_buffers(page);
194 do {
195 if (buffer_dirty(bh))
196 return 0;
197 bh = bh->b_this_page;
198 } while (bh != head);
199 return 1;
200}
201
202void nilfs_page_bug(struct page *page)
203{
204 struct address_space *m;
205 unsigned long ino = 0;
206
207 if (unlikely(!page)) {
208 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
209 return;
210 }
211
212 m = page->mapping;
213 if (m) {
214 struct inode *inode = NILFS_AS_I(m);
215 if (inode != NULL)
216 ino = inode->i_ino;
217 }
218 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
219 "mapping=%p ino=%lu\n",
220 page, atomic_read(&page->_count),
221 (unsigned long long)page->index, page->flags, m, ino);
222
223 if (page_has_buffers(page)) {
224 struct buffer_head *bh, *head;
225 int i = 0;
226
227 bh = head = page_buffers(page);
228 do {
229 printk(KERN_CRIT
230 " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
231 i++, bh, atomic_read(&bh->b_count),
232 (unsigned long long)bh->b_blocknr, bh->b_state);
233 bh = bh->b_this_page;
234 } while (bh != head);
235 }
236}
237
238/**
239 * nilfs_alloc_private_page - allocate a private page with buffer heads
240 *
241 * Return Value: On success, a pointer to the allocated page is returned.
242 * On error, NULL is returned.
243 */
244struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
245 unsigned long state)
246{
247 struct buffer_head *bh, *head, *tail;
248 struct page *page;
249
250 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
251 if (unlikely(!page))
252 return NULL;
253
254 lock_page(page);
255 head = alloc_page_buffers(page, size, 0);
256 if (unlikely(!head)) {
257 unlock_page(page);
258 __free_page(page);
259 return NULL;
260 }
261
262 bh = head;
263 do {
264 bh->b_state = (1UL << BH_NILFS_Allocated) | state;
265 tail = bh;
266 bh->b_bdev = bdev;
267 bh = bh->b_this_page;
268 } while (bh);
269
270 tail->b_this_page = head;
271 attach_page_buffers(page, head);
272
273 return page;
274}
275
276void nilfs_free_private_page(struct page *page)
277{
278 BUG_ON(!PageLocked(page));
279 BUG_ON(page->mapping);
280
281 if (page_has_buffers(page) && !try_to_free_buffers(page))
282 NILFS_PAGE_BUG(page, "failed to free page");
283
284 unlock_page(page);
285 __free_page(page);
286}
287
288/**
289 * nilfs_copy_page -- copy the page with buffers
290 * @dst: destination page
291 * @src: source page
292 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
293 *
294 * This fuction is for both data pages and btnode pages. The dirty flag
295 * should be treated by caller. The page must not be under i/o.
296 * Both src and dst page must be locked
297 */
298static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
299{
300 struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
301 unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
302
303 BUG_ON(PageWriteback(dst));
304
305 sbh = sbufs = page_buffers(src);
306 if (!page_has_buffers(dst))
307 create_empty_buffers(dst, sbh->b_size, 0);
308
309 if (copy_dirty)
310 mask |= (1UL << BH_Dirty);
311
312 dbh = dbufs = page_buffers(dst);
313 do {
314 lock_buffer(sbh);
315 lock_buffer(dbh);
316 dbh->b_state = sbh->b_state & mask;
317 dbh->b_blocknr = sbh->b_blocknr;
318 dbh->b_bdev = sbh->b_bdev;
319 sbh = sbh->b_this_page;
320 dbh = dbh->b_this_page;
321 } while (dbh != dbufs);
322
323 copy_highpage(dst, src);
324
325 if (PageUptodate(src) && !PageUptodate(dst))
326 SetPageUptodate(dst);
327 else if (!PageUptodate(src) && PageUptodate(dst))
328 ClearPageUptodate(dst);
329 if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
330 SetPageMappedToDisk(dst);
331 else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
332 ClearPageMappedToDisk(dst);
333
334 do {
335 unlock_buffer(sbh);
336 unlock_buffer(dbh);
337 sbh = sbh->b_this_page;
338 dbh = dbh->b_this_page;
339 } while (dbh != dbufs);
340}
341
342int nilfs_copy_dirty_pages(struct address_space *dmap,
343 struct address_space *smap)
344{
345 struct pagevec pvec;
346 unsigned int i;
347 pgoff_t index = 0;
348 int err = 0;
349
350 pagevec_init(&pvec, 0);
351repeat:
352 if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
353 PAGEVEC_SIZE))
354 return 0;
355
356 for (i = 0; i < pagevec_count(&pvec); i++) {
357 struct page *page = pvec.pages[i], *dpage;
358
359 lock_page(page);
360 if (unlikely(!PageDirty(page)))
361 NILFS_PAGE_BUG(page, "inconsistent dirty state");
362
363 dpage = grab_cache_page(dmap, page->index);
364 if (unlikely(!dpage)) {
365 /* No empty page is added to the page cache */
366 err = -ENOMEM;
367 unlock_page(page);
368 break;
369 }
370 if (unlikely(!page_has_buffers(page)))
371 NILFS_PAGE_BUG(page,
372 "found empty page in dat page cache");
373
374 nilfs_copy_page(dpage, page, 1);
375 __set_page_dirty_nobuffers(dpage);
376
377 unlock_page(dpage);
378 page_cache_release(dpage);
379 unlock_page(page);
380 }
381 pagevec_release(&pvec);
382 cond_resched();
383
384 if (likely(!err))
385 goto repeat;
386 return err;
387}
388
389/**
390 * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache
391 * @dmap: destination page cache
392 * @smap: source page cache
393 *
394 * No pages must no be added to the cache during this process.
395 * This must be ensured by the caller.
396 */
397void nilfs_copy_back_pages(struct address_space *dmap,
398 struct address_space *smap)
399{
400 struct pagevec pvec;
401 unsigned int i, n;
402 pgoff_t index = 0;
403 int err;
404
405 pagevec_init(&pvec, 0);
406repeat:
407 n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
408 if (!n)
409 return;
410 index = pvec.pages[n - 1]->index + 1;
411
412 for (i = 0; i < pagevec_count(&pvec); i++) {
413 struct page *page = pvec.pages[i], *dpage;
414 pgoff_t offset = page->index;
415
416 lock_page(page);
417 dpage = find_lock_page(dmap, offset);
418 if (dpage) {
419 /* override existing page on the destination cache */
420 BUG_ON(PageDirty(dpage));
421 nilfs_copy_page(dpage, page, 0);
422 unlock_page(dpage);
423 page_cache_release(dpage);
424 } else {
425 struct page *page2;
426
427 /* move the page to the destination cache */
428 spin_lock_irq(&smap->tree_lock);
429 page2 = radix_tree_delete(&smap->page_tree, offset);
430 if (unlikely(page2 != page))
431 NILFS_PAGE_BUG(page, "page removal failed "
432 "(offset=%lu, page2=%p)",
433 offset, page2);
434 smap->nrpages--;
435 spin_unlock_irq(&smap->tree_lock);
436
437 spin_lock_irq(&dmap->tree_lock);
438 err = radix_tree_insert(&dmap->page_tree, offset, page);
439 if (unlikely(err < 0)) {
440 BUG_ON(err == -EEXIST);
441 page->mapping = NULL;
442 page_cache_release(page); /* for cache */
443 } else {
444 page->mapping = dmap;
445 dmap->nrpages++;
446 if (PageDirty(page))
447 radix_tree_tag_set(&dmap->page_tree,
448 offset,
449 PAGECACHE_TAG_DIRTY);
450 }
451 spin_unlock_irq(&dmap->tree_lock);
452 }
453 unlock_page(page);
454 }
455 pagevec_release(&pvec);
456 cond_resched();
457
458 goto repeat;
459}
460
461void nilfs_clear_dirty_pages(struct address_space *mapping)
462{
463 struct pagevec pvec;
464 unsigned int i;
465 pgoff_t index = 0;
466
467 pagevec_init(&pvec, 0);
468
469 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
470 PAGEVEC_SIZE)) {
471 for (i = 0; i < pagevec_count(&pvec); i++) {
472 struct page *page = pvec.pages[i];
473 struct buffer_head *bh, *head;
474
475 lock_page(page);
476 ClearPageUptodate(page);
477 ClearPageMappedToDisk(page);
478 bh = head = page_buffers(page);
479 do {
480 lock_buffer(bh);
481 clear_buffer_dirty(bh);
482 clear_buffer_nilfs_volatile(bh);
483 clear_buffer_uptodate(bh);
484 clear_buffer_mapped(bh);
485 unlock_buffer(bh);
486 bh = bh->b_this_page;
487 } while (bh != head);
488
489 __nilfs_clear_page_dirty(page);
490 unlock_page(page);
491 }
492 pagevec_release(&pvec);
493 cond_resched();
494 }
495}
496
497unsigned nilfs_page_count_clean_buffers(struct page *page,
498 unsigned from, unsigned to)
499{
500 unsigned block_start, block_end;
501 struct buffer_head *bh, *head;
502 unsigned nc = 0;
503
504 for (bh = head = page_buffers(page), block_start = 0;
505 bh != head || !block_start;
506 block_start = block_end, bh = bh->b_this_page) {
507 block_end = block_start + bh->b_size;
508 if (block_end > from && block_start < to && !buffer_dirty(bh))
509 nc++;
510 }
511 return nc;
512}
513
514/*
515 * NILFS2 needs clear_page_dirty() in the following two cases:
516 *
517 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
518 * page dirty flags when it copies back pages from the shadow cache
519 * (gcdat->{i_mapping,i_btnode_cache}) to its original cache
520 * (dat->{i_mapping,i_btnode_cache}).
521 *
522 * 2) Some B-tree operations like insertion or deletion may dispose buffers
523 * in dirty state, and this needs to cancel the dirty state of their pages.
524 */
525int __nilfs_clear_page_dirty(struct page *page)
526{
527 struct address_space *mapping = page->mapping;
528
529 if (mapping) {
530 spin_lock_irq(&mapping->tree_lock);
531 if (test_bit(PG_dirty, &page->flags)) {
532 radix_tree_tag_clear(&mapping->page_tree,
533 page_index(page),
534 PAGECACHE_TAG_DIRTY);
535 spin_unlock_irq(&mapping->tree_lock);
536 return clear_page_dirty_for_io(page);
537 }
538 spin_unlock_irq(&mapping->tree_lock);
539 return 0;
540 }
541 return TestClearPageDirty(page);
542}