diff options
author | Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> | 2009-04-06 22:01:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-07 11:31:13 -0400 |
commit | a60be987d45dd510aeb54389526f9957cfab106c (patch) | |
tree | ffe680785f1f7d4e4096cd211e37bcfeaccafc63 /fs/nilfs2/btnode.c | |
parent | 36a580eb489f54d81a0534974962e732a314b999 (diff) |
nilfs2: B-tree node cache
This adds routines for B-tree node buffers.
Signed-off-by: Seiji Kihara <kihara.seiji@lab.ntt.co.jp>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/nilfs2/btnode.c')
-rw-r--r-- | fs/nilfs2/btnode.c | 316 |
1 files changed, 316 insertions, 0 deletions
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c new file mode 100644 index 000000000000..4cc07b2c30e0 --- /dev/null +++ b/fs/nilfs2/btnode.c | |||
@@ -0,0 +1,316 @@ | |||
1 | /* | ||
2 | * btnode.c - NILFS B-tree node cache | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * This file was originally written by Seiji Kihara <kihara@osrg.net> | ||
21 | * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for | ||
22 | * stabilization and simplification. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/backing-dev.h> | ||
30 | #include "nilfs.h" | ||
31 | #include "mdt.h" | ||
32 | #include "dat.h" | ||
33 | #include "page.h" | ||
34 | #include "btnode.h" | ||
35 | |||
36 | |||
37 | void nilfs_btnode_cache_init_once(struct address_space *btnc) | ||
38 | { | ||
39 | INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); | ||
40 | spin_lock_init(&btnc->tree_lock); | ||
41 | INIT_LIST_HEAD(&btnc->private_list); | ||
42 | spin_lock_init(&btnc->private_lock); | ||
43 | |||
44 | spin_lock_init(&btnc->i_mmap_lock); | ||
45 | INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap); | ||
46 | INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); | ||
47 | } | ||
48 | |||
49 | static struct address_space_operations def_btnode_aops; | ||
50 | |||
51 | void nilfs_btnode_cache_init(struct address_space *btnc) | ||
52 | { | ||
53 | btnc->host = NULL; /* can safely set to host inode ? */ | ||
54 | btnc->flags = 0; | ||
55 | mapping_set_gfp_mask(btnc, GFP_NOFS); | ||
56 | btnc->assoc_mapping = NULL; | ||
57 | btnc->backing_dev_info = &default_backing_dev_info; | ||
58 | btnc->a_ops = &def_btnode_aops; | ||
59 | } | ||
60 | |||
61 | void nilfs_btnode_cache_clear(struct address_space *btnc) | ||
62 | { | ||
63 | invalidate_mapping_pages(btnc, 0, -1); | ||
64 | truncate_inode_pages(btnc, 0); | ||
65 | } | ||
66 | |||
67 | int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, | ||
68 | sector_t pblocknr, struct buffer_head **pbh, | ||
69 | int newblk) | ||
70 | { | ||
71 | struct buffer_head *bh; | ||
72 | struct inode *inode = NILFS_BTNC_I(btnc); | ||
73 | int err; | ||
74 | |||
75 | bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); | ||
76 | if (unlikely(!bh)) | ||
77 | return -ENOMEM; | ||
78 | |||
79 | err = -EEXIST; /* internal code */ | ||
80 | if (newblk) { | ||
81 | if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || | ||
82 | buffer_dirty(bh))) { | ||
83 | brelse(bh); | ||
84 | BUG(); | ||
85 | } | ||
86 | bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; | ||
87 | bh->b_blocknr = blocknr; | ||
88 | set_buffer_mapped(bh); | ||
89 | set_buffer_uptodate(bh); | ||
90 | goto found; | ||
91 | } | ||
92 | |||
93 | if (buffer_uptodate(bh) || buffer_dirty(bh)) | ||
94 | goto found; | ||
95 | |||
96 | if (pblocknr == 0) { | ||
97 | pblocknr = blocknr; | ||
98 | if (inode->i_ino != NILFS_DAT_INO) { | ||
99 | struct inode *dat = | ||
100 | nilfs_dat_inode(NILFS_I_NILFS(inode)); | ||
101 | |||
102 | /* blocknr is a virtual block number */ | ||
103 | err = nilfs_dat_translate(dat, blocknr, &pblocknr); | ||
104 | if (unlikely(err)) { | ||
105 | brelse(bh); | ||
106 | goto out_locked; | ||
107 | } | ||
108 | } | ||
109 | } | ||
110 | lock_buffer(bh); | ||
111 | if (buffer_uptodate(bh)) { | ||
112 | unlock_buffer(bh); | ||
113 | err = -EEXIST; /* internal code */ | ||
114 | goto found; | ||
115 | } | ||
116 | set_buffer_mapped(bh); | ||
117 | bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; | ||
118 | bh->b_blocknr = pblocknr; /* set block address for read */ | ||
119 | bh->b_end_io = end_buffer_read_sync; | ||
120 | get_bh(bh); | ||
121 | submit_bh(READ, bh); | ||
122 | bh->b_blocknr = blocknr; /* set back to the given block address */ | ||
123 | err = 0; | ||
124 | found: | ||
125 | *pbh = bh; | ||
126 | |||
127 | out_locked: | ||
128 | unlock_page(bh->b_page); | ||
129 | page_cache_release(bh->b_page); | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr, | ||
134 | sector_t pblocknr, struct buffer_head **pbh, int newblk) | ||
135 | { | ||
136 | struct buffer_head *bh; | ||
137 | int err; | ||
138 | |||
139 | err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk); | ||
140 | if (err == -EEXIST) /* internal code (cache hit) */ | ||
141 | return 0; | ||
142 | if (unlikely(err)) | ||
143 | return err; | ||
144 | |||
145 | bh = *pbh; | ||
146 | wait_on_buffer(bh); | ||
147 | if (!buffer_uptodate(bh)) { | ||
148 | brelse(bh); | ||
149 | return -EIO; | ||
150 | } | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * nilfs_btnode_delete - delete B-tree node buffer | ||
156 | * @bh: buffer to be deleted | ||
157 | * | ||
158 | * nilfs_btnode_delete() invalidates the specified buffer and delete the page | ||
159 | * including the buffer if the page gets unbusy. | ||
160 | */ | ||
161 | void nilfs_btnode_delete(struct buffer_head *bh) | ||
162 | { | ||
163 | struct address_space *mapping; | ||
164 | struct page *page = bh->b_page; | ||
165 | pgoff_t index = page_index(page); | ||
166 | int still_dirty; | ||
167 | |||
168 | page_cache_get(page); | ||
169 | lock_page(page); | ||
170 | wait_on_page_writeback(page); | ||
171 | |||
172 | nilfs_forget_buffer(bh); | ||
173 | still_dirty = PageDirty(page); | ||
174 | mapping = page->mapping; | ||
175 | unlock_page(page); | ||
176 | page_cache_release(page); | ||
177 | |||
178 | if (!still_dirty && mapping) | ||
179 | invalidate_inode_pages2_range(mapping, index, index); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * nilfs_btnode_prepare_change_key | ||
184 | * prepare to move contents of the block for old key to one of new key. | ||
185 | * the old buffer will not be removed, but might be reused for new buffer. | ||
186 | * it might return -ENOMEM because of memory allocation errors, | ||
187 | * and might return -EIO because of disk read errors. | ||
188 | */ | ||
189 | int nilfs_btnode_prepare_change_key(struct address_space *btnc, | ||
190 | struct nilfs_btnode_chkey_ctxt *ctxt) | ||
191 | { | ||
192 | struct buffer_head *obh, *nbh; | ||
193 | struct inode *inode = NILFS_BTNC_I(btnc); | ||
194 | __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; | ||
195 | int err; | ||
196 | |||
197 | if (oldkey == newkey) | ||
198 | return 0; | ||
199 | |||
200 | obh = ctxt->bh; | ||
201 | ctxt->newbh = NULL; | ||
202 | |||
203 | if (inode->i_blkbits == PAGE_CACHE_SHIFT) { | ||
204 | lock_page(obh->b_page); | ||
205 | /* | ||
206 | * We cannot call radix_tree_preload for the kernels older | ||
207 | * than 2.6.23, because it is not exported for modules. | ||
208 | */ | ||
209 | err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
210 | if (err) | ||
211 | goto failed_unlock; | ||
212 | /* BUG_ON(oldkey != obh->b_page->index); */ | ||
213 | if (unlikely(oldkey != obh->b_page->index)) | ||
214 | NILFS_PAGE_BUG(obh->b_page, | ||
215 | "invalid oldkey %lld (newkey=%lld)", | ||
216 | (unsigned long long)oldkey, | ||
217 | (unsigned long long)newkey); | ||
218 | |||
219 | retry: | ||
220 | spin_lock_irq(&btnc->tree_lock); | ||
221 | err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); | ||
222 | spin_unlock_irq(&btnc->tree_lock); | ||
223 | /* | ||
224 | * Note: page->index will not change to newkey until | ||
225 | * nilfs_btnode_commit_change_key() will be called. | ||
226 | * To protect the page in intermediate state, the page lock | ||
227 | * is held. | ||
228 | */ | ||
229 | radix_tree_preload_end(); | ||
230 | if (!err) | ||
231 | return 0; | ||
232 | else if (err != -EEXIST) | ||
233 | goto failed_unlock; | ||
234 | |||
235 | err = invalidate_inode_pages2_range(btnc, newkey, newkey); | ||
236 | if (!err) | ||
237 | goto retry; | ||
238 | /* fallback to copy mode */ | ||
239 | unlock_page(obh->b_page); | ||
240 | } | ||
241 | |||
242 | err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1); | ||
243 | if (likely(!err)) { | ||
244 | BUG_ON(nbh == obh); | ||
245 | ctxt->newbh = nbh; | ||
246 | } | ||
247 | return err; | ||
248 | |||
249 | failed_unlock: | ||
250 | unlock_page(obh->b_page); | ||
251 | return err; | ||
252 | } | ||
253 | |||
254 | /** | ||
255 | * nilfs_btnode_commit_change_key | ||
256 | * commit the change_key operation prepared by prepare_change_key(). | ||
257 | */ | ||
258 | void nilfs_btnode_commit_change_key(struct address_space *btnc, | ||
259 | struct nilfs_btnode_chkey_ctxt *ctxt) | ||
260 | { | ||
261 | struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; | ||
262 | __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; | ||
263 | struct page *opage; | ||
264 | |||
265 | if (oldkey == newkey) | ||
266 | return; | ||
267 | |||
268 | if (nbh == NULL) { /* blocksize == pagesize */ | ||
269 | opage = obh->b_page; | ||
270 | if (unlikely(oldkey != opage->index)) | ||
271 | NILFS_PAGE_BUG(opage, | ||
272 | "invalid oldkey %lld (newkey=%lld)", | ||
273 | (unsigned long long)oldkey, | ||
274 | (unsigned long long)newkey); | ||
275 | if (!test_set_buffer_dirty(obh) && TestSetPageDirty(opage)) | ||
276 | BUG(); | ||
277 | |||
278 | spin_lock_irq(&btnc->tree_lock); | ||
279 | radix_tree_delete(&btnc->page_tree, oldkey); | ||
280 | radix_tree_tag_set(&btnc->page_tree, newkey, | ||
281 | PAGECACHE_TAG_DIRTY); | ||
282 | spin_unlock_irq(&btnc->tree_lock); | ||
283 | |||
284 | opage->index = obh->b_blocknr = newkey; | ||
285 | unlock_page(opage); | ||
286 | } else { | ||
287 | nilfs_copy_buffer(nbh, obh); | ||
288 | nilfs_btnode_mark_dirty(nbh); | ||
289 | |||
290 | nbh->b_blocknr = newkey; | ||
291 | ctxt->bh = nbh; | ||
292 | nilfs_btnode_delete(obh); /* will decrement bh->b_count */ | ||
293 | } | ||
294 | } | ||
295 | |||
296 | /** | ||
297 | * nilfs_btnode_abort_change_key | ||
298 | * abort the change_key operation prepared by prepare_change_key(). | ||
299 | */ | ||
300 | void nilfs_btnode_abort_change_key(struct address_space *btnc, | ||
301 | struct nilfs_btnode_chkey_ctxt *ctxt) | ||
302 | { | ||
303 | struct buffer_head *nbh = ctxt->newbh; | ||
304 | __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; | ||
305 | |||
306 | if (oldkey == newkey) | ||
307 | return; | ||
308 | |||
309 | if (nbh == NULL) { /* blocksize == pagesize */ | ||
310 | spin_lock_irq(&btnc->tree_lock); | ||
311 | radix_tree_delete(&btnc->page_tree, newkey); | ||
312 | spin_unlock_irq(&btnc->tree_lock); | ||
313 | unlock_page(ctxt->bh->b_page); | ||
314 | } else | ||
315 | brelse(nbh); | ||
316 | } | ||