diff options
Diffstat (limited to 'fs/logfs/segment.c')
-rw-r--r-- | fs/logfs/segment.c | 930 |
1 files changed, 930 insertions, 0 deletions
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c new file mode 100644 index 000000000000..f77ce2b470ba --- /dev/null +++ b/fs/logfs/segment.c | |||
@@ -0,0 +1,930 @@ | |||
1 | /* | ||
2 | * fs/logfs/segment.c - Handling the Object Store | ||
3 | * | ||
4 | * As should be obvious for Linux kernel code, license is GPLv2 | ||
5 | * | ||
6 | * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> | ||
7 | * | ||
8 | * Object store or ostore makes up the complete device with exception of | ||
9 | * the superblock and journal areas. Apart from its own metadata it stores | ||
10 | * three kinds of objects: inodes, dentries and blocks, both data and indirect. | ||
11 | */ | ||
12 | #include "logfs.h" | ||
13 | #include <linux/slab.h> | ||
14 | |||
15 | static int logfs_mark_segment_bad(struct super_block *sb, u32 segno) | ||
16 | { | ||
17 | struct logfs_super *super = logfs_super(sb); | ||
18 | struct btree_head32 *head = &super->s_reserved_segments; | ||
19 | int err; | ||
20 | |||
21 | err = btree_insert32(head, segno, (void *)1, GFP_NOFS); | ||
22 | if (err) | ||
23 | return err; | ||
24 | logfs_super(sb)->s_bad_segments++; | ||
25 | /* FIXME: write to journal */ | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase) | ||
30 | { | ||
31 | struct logfs_super *super = logfs_super(sb); | ||
32 | |||
33 | super->s_gec++; | ||
34 | |||
35 | return super->s_devops->erase(sb, (u64)segno << super->s_segshift, | ||
36 | super->s_segsize, ensure_erase); | ||
37 | } | ||
38 | |||
39 | static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes) | ||
40 | { | ||
41 | s32 ofs; | ||
42 | |||
43 | logfs_open_area(area, bytes); | ||
44 | |||
45 | ofs = area->a_used_bytes; | ||
46 | area->a_used_bytes += bytes; | ||
47 | BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize); | ||
48 | |||
49 | return dev_ofs(area->a_sb, area->a_segno, ofs); | ||
50 | } | ||
51 | |||
52 | static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, | ||
53 | int use_filler) | ||
54 | { | ||
55 | struct logfs_super *super = logfs_super(sb); | ||
56 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | ||
57 | filler_t *filler = super->s_devops->readpage; | ||
58 | struct page *page; | ||
59 | |||
60 | BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS); | ||
61 | if (use_filler) | ||
62 | page = read_cache_page(mapping, index, filler, sb); | ||
63 | else { | ||
64 | page = find_or_create_page(mapping, index, GFP_NOFS); | ||
65 | unlock_page(page); | ||
66 | } | ||
67 | return page; | ||
68 | } | ||
69 | |||
70 | void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, | ||
71 | int use_filler) | ||
72 | { | ||
73 | pgoff_t index = ofs >> PAGE_SHIFT; | ||
74 | struct page *page; | ||
75 | long offset = ofs & (PAGE_SIZE-1); | ||
76 | long copylen; | ||
77 | |||
78 | /* Only logfs_wbuf_recover may use len==0 */ | ||
79 | BUG_ON(!len && !use_filler); | ||
80 | do { | ||
81 | copylen = min((ulong)len, PAGE_SIZE - offset); | ||
82 | |||
83 | page = get_mapping_page(area->a_sb, index, use_filler); | ||
84 | SetPageUptodate(page); | ||
85 | BUG_ON(!page); /* FIXME: reserve a pool */ | ||
86 | memcpy(page_address(page) + offset, buf, copylen); | ||
87 | SetPagePrivate(page); | ||
88 | page_cache_release(page); | ||
89 | |||
90 | buf += copylen; | ||
91 | len -= copylen; | ||
92 | offset = 0; | ||
93 | index++; | ||
94 | } while (len); | ||
95 | } | ||
96 | |||
97 | static void pad_partial_page(struct logfs_area *area) | ||
98 | { | ||
99 | struct super_block *sb = area->a_sb; | ||
100 | struct page *page; | ||
101 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); | ||
102 | pgoff_t index = ofs >> PAGE_SHIFT; | ||
103 | long offset = ofs & (PAGE_SIZE-1); | ||
104 | u32 len = PAGE_SIZE - offset; | ||
105 | |||
106 | if (len % PAGE_SIZE) { | ||
107 | page = get_mapping_page(sb, index, 0); | ||
108 | BUG_ON(!page); /* FIXME: reserve a pool */ | ||
109 | memset(page_address(page) + offset, 0xff, len); | ||
110 | SetPagePrivate(page); | ||
111 | page_cache_release(page); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | static void pad_full_pages(struct logfs_area *area) | ||
116 | { | ||
117 | struct super_block *sb = area->a_sb; | ||
118 | struct logfs_super *super = logfs_super(sb); | ||
119 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); | ||
120 | u32 len = super->s_segsize - area->a_used_bytes; | ||
121 | pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; | ||
122 | pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; | ||
123 | struct page *page; | ||
124 | |||
125 | while (no_indizes) { | ||
126 | page = get_mapping_page(sb, index, 0); | ||
127 | BUG_ON(!page); /* FIXME: reserve a pool */ | ||
128 | SetPageUptodate(page); | ||
129 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | ||
130 | SetPagePrivate(page); | ||
131 | page_cache_release(page); | ||
132 | index++; | ||
133 | no_indizes--; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. | ||
139 | * Also make sure we allocate (and memset) all pages for final writeout. | ||
140 | */ | ||
141 | static void pad_wbuf(struct logfs_area *area, int final) | ||
142 | { | ||
143 | pad_partial_page(area); | ||
144 | if (final) | ||
145 | pad_full_pages(area); | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * We have to be careful with the alias tree. Since lookup is done by bix, | ||
150 | * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with | ||
151 | * indirect blocks. So always use it through accessor functions. | ||
152 | */ | ||
153 | static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix, | ||
154 | level_t level) | ||
155 | { | ||
156 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; | ||
157 | pgoff_t index = logfs_pack_index(bix, level); | ||
158 | |||
159 | return btree_lookup128(head, ino, index); | ||
160 | } | ||
161 | |||
162 | static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix, | ||
163 | level_t level, void *val) | ||
164 | { | ||
165 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; | ||
166 | pgoff_t index = logfs_pack_index(bix, level); | ||
167 | |||
168 | return btree_insert128(head, ino, index, val, GFP_NOFS); | ||
169 | } | ||
170 | |||
171 | static int btree_write_alias(struct super_block *sb, struct logfs_block *block, | ||
172 | write_alias_t *write_one_alias) | ||
173 | { | ||
174 | struct object_alias_item *item; | ||
175 | int err; | ||
176 | |||
177 | list_for_each_entry(item, &block->item_list, list) { | ||
178 | err = write_alias_journal(sb, block->ino, block->bix, | ||
179 | block->level, item->child_no, item->val); | ||
180 | if (err) | ||
181 | return err; | ||
182 | } | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | static struct logfs_block_ops btree_block_ops = { | ||
187 | .write_block = btree_write_block, | ||
188 | .free_block = __free_block, | ||
189 | .write_alias = btree_write_alias, | ||
190 | }; | ||
191 | |||
192 | int logfs_load_object_aliases(struct super_block *sb, | ||
193 | struct logfs_obj_alias *oa, int count) | ||
194 | { | ||
195 | struct logfs_super *super = logfs_super(sb); | ||
196 | struct logfs_block *block; | ||
197 | struct object_alias_item *item; | ||
198 | u64 ino, bix; | ||
199 | level_t level; | ||
200 | int i, err; | ||
201 | |||
202 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; | ||
203 | count /= sizeof(*oa); | ||
204 | for (i = 0; i < count; i++) { | ||
205 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); | ||
206 | if (!item) | ||
207 | return -ENOMEM; | ||
208 | memset(item, 0, sizeof(*item)); | ||
209 | |||
210 | super->s_no_object_aliases++; | ||
211 | item->val = oa[i].val; | ||
212 | item->child_no = be16_to_cpu(oa[i].child_no); | ||
213 | |||
214 | ino = be64_to_cpu(oa[i].ino); | ||
215 | bix = be64_to_cpu(oa[i].bix); | ||
216 | level = LEVEL(oa[i].level); | ||
217 | |||
218 | log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n", | ||
219 | ino, bix, level, item->child_no, | ||
220 | be64_to_cpu(item->val)); | ||
221 | block = alias_tree_lookup(sb, ino, bix, level); | ||
222 | if (!block) { | ||
223 | block = __alloc_block(sb, ino, bix, level); | ||
224 | block->ops = &btree_block_ops; | ||
225 | err = alias_tree_insert(sb, ino, bix, level, block); | ||
226 | BUG_ON(err); /* mempool empty */ | ||
227 | } | ||
228 | if (test_and_set_bit(item->child_no, block->alias_map)) { | ||
229 | printk(KERN_ERR"LogFS: Alias collision detected\n"); | ||
230 | return -EIO; | ||
231 | } | ||
232 | list_move_tail(&block->alias_list, &super->s_object_alias); | ||
233 | list_add(&item->list, &block->item_list); | ||
234 | } | ||
235 | return 0; | ||
236 | } | ||
237 | |||
238 | static void kill_alias(void *_block, unsigned long ignore0, | ||
239 | u64 ignore1, u64 ignore2, size_t ignore3) | ||
240 | { | ||
241 | struct logfs_block *block = _block; | ||
242 | struct super_block *sb = block->sb; | ||
243 | struct logfs_super *super = logfs_super(sb); | ||
244 | struct object_alias_item *item; | ||
245 | |||
246 | while (!list_empty(&block->item_list)) { | ||
247 | item = list_entry(block->item_list.next, typeof(*item), list); | ||
248 | list_del(&item->list); | ||
249 | mempool_free(item, super->s_alias_pool); | ||
250 | } | ||
251 | block->ops->free_block(sb, block); | ||
252 | } | ||
253 | |||
254 | static int obj_type(struct inode *inode, level_t level) | ||
255 | { | ||
256 | if (level == 0) { | ||
257 | if (S_ISDIR(inode->i_mode)) | ||
258 | return OBJ_DENTRY; | ||
259 | if (inode->i_ino == LOGFS_INO_MASTER) | ||
260 | return OBJ_INODE; | ||
261 | } | ||
262 | return OBJ_BLOCK; | ||
263 | } | ||
264 | |||
265 | static int obj_len(struct super_block *sb, int obj_type) | ||
266 | { | ||
267 | switch (obj_type) { | ||
268 | case OBJ_DENTRY: | ||
269 | return sizeof(struct logfs_disk_dentry); | ||
270 | case OBJ_INODE: | ||
271 | return sizeof(struct logfs_disk_inode); | ||
272 | case OBJ_BLOCK: | ||
273 | return sb->s_blocksize; | ||
274 | default: | ||
275 | BUG(); | ||
276 | } | ||
277 | } | ||
278 | |||
279 | static int __logfs_segment_write(struct inode *inode, void *buf, | ||
280 | struct logfs_shadow *shadow, int type, int len, int compr) | ||
281 | { | ||
282 | struct logfs_area *area; | ||
283 | struct super_block *sb = inode->i_sb; | ||
284 | s64 ofs; | ||
285 | struct logfs_object_header h; | ||
286 | int acc_len; | ||
287 | |||
288 | if (shadow->gc_level == 0) | ||
289 | acc_len = len; | ||
290 | else | ||
291 | acc_len = obj_len(sb, type); | ||
292 | |||
293 | area = get_area(sb, shadow->gc_level); | ||
294 | ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE); | ||
295 | LOGFS_BUG_ON(ofs <= 0, sb); | ||
296 | /* | ||
297 | * Order is important. logfs_get_free_bytes(), by modifying the | ||
298 | * segment file, may modify the content of the very page we're about | ||
299 | * to write now. Which is fine, as long as the calculated crc and | ||
300 | * written data still match. So do the modifications _before_ | ||
301 | * calculating the crc. | ||
302 | */ | ||
303 | |||
304 | h.len = cpu_to_be16(len); | ||
305 | h.type = type; | ||
306 | h.compr = compr; | ||
307 | h.ino = cpu_to_be64(inode->i_ino); | ||
308 | h.bix = cpu_to_be64(shadow->bix); | ||
309 | h.crc = logfs_crc32(&h, sizeof(h) - 4, 4); | ||
310 | h.data_crc = logfs_crc32(buf, len, 0); | ||
311 | |||
312 | logfs_buf_write(area, ofs, &h, sizeof(h)); | ||
313 | logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len); | ||
314 | |||
315 | shadow->new_ofs = ofs; | ||
316 | shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE; | ||
317 | |||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | static s64 logfs_segment_write_compress(struct inode *inode, void *buf, | ||
322 | struct logfs_shadow *shadow, int type, int len) | ||
323 | { | ||
324 | struct super_block *sb = inode->i_sb; | ||
325 | void *compressor_buf = logfs_super(sb)->s_compressed_je; | ||
326 | ssize_t compr_len; | ||
327 | int ret; | ||
328 | |||
329 | mutex_lock(&logfs_super(sb)->s_journal_mutex); | ||
330 | compr_len = logfs_compress(buf, compressor_buf, len, len); | ||
331 | |||
332 | if (compr_len >= 0) { | ||
333 | ret = __logfs_segment_write(inode, compressor_buf, shadow, | ||
334 | type, compr_len, COMPR_ZLIB); | ||
335 | } else { | ||
336 | ret = __logfs_segment_write(inode, buf, shadow, type, len, | ||
337 | COMPR_NONE); | ||
338 | } | ||
339 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | ||
340 | return ret; | ||
341 | } | ||
342 | |||
343 | /** | ||
344 | * logfs_segment_write - write data block to object store | ||
345 | * @inode: inode containing data | ||
346 | * | ||
347 | * Returns an errno or zero. | ||
348 | */ | ||
349 | int logfs_segment_write(struct inode *inode, struct page *page, | ||
350 | struct logfs_shadow *shadow) | ||
351 | { | ||
352 | struct super_block *sb = inode->i_sb; | ||
353 | struct logfs_super *super = logfs_super(sb); | ||
354 | int do_compress, type, len; | ||
355 | int ret; | ||
356 | void *buf; | ||
357 | |||
358 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; | ||
359 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); | ||
360 | do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED; | ||
361 | if (shadow->gc_level != 0) { | ||
362 | /* temporarily disable compression for indirect blocks */ | ||
363 | do_compress = 0; | ||
364 | } | ||
365 | |||
366 | type = obj_type(inode, shrink_level(shadow->gc_level)); | ||
367 | len = obj_len(sb, type); | ||
368 | buf = kmap(page); | ||
369 | if (do_compress) | ||
370 | ret = logfs_segment_write_compress(inode, buf, shadow, type, | ||
371 | len); | ||
372 | else | ||
373 | ret = __logfs_segment_write(inode, buf, shadow, type, len, | ||
374 | COMPR_NONE); | ||
375 | kunmap(page); | ||
376 | |||
377 | log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n", | ||
378 | shadow->ino, shadow->bix, shadow->gc_level, | ||
379 | shadow->old_ofs, shadow->new_ofs, | ||
380 | shadow->old_len, shadow->new_len); | ||
381 | /* this BUG_ON did catch a locking bug. useful */ | ||
382 | BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1))); | ||
383 | return ret; | ||
384 | } | ||
385 | |||
386 | int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf) | ||
387 | { | ||
388 | pgoff_t index = ofs >> PAGE_SHIFT; | ||
389 | struct page *page; | ||
390 | long offset = ofs & (PAGE_SIZE-1); | ||
391 | long copylen; | ||
392 | |||
393 | while (len) { | ||
394 | copylen = min((ulong)len, PAGE_SIZE - offset); | ||
395 | |||
396 | page = get_mapping_page(sb, index, 1); | ||
397 | if (IS_ERR(page)) | ||
398 | return PTR_ERR(page); | ||
399 | memcpy(buf, page_address(page) + offset, copylen); | ||
400 | page_cache_release(page); | ||
401 | |||
402 | buf += copylen; | ||
403 | len -= copylen; | ||
404 | offset = 0; | ||
405 | index++; | ||
406 | } | ||
407 | return 0; | ||
408 | } | ||
409 | |||
410 | /* | ||
411 | * The "position" of indirect blocks is ambiguous. It can be the position | ||
412 | * of any data block somewhere behind this indirect block. So we need to | ||
413 | * normalize the positions through logfs_block_mask() before comparing. | ||
414 | */ | ||
415 | static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level) | ||
416 | { | ||
417 | return (pos1 & logfs_block_mask(sb, level)) != | ||
418 | (pos2 & logfs_block_mask(sb, level)); | ||
419 | } | ||
420 | |||
421 | #if 0 | ||
422 | static int read_seg_header(struct super_block *sb, u64 ofs, | ||
423 | struct logfs_segment_header *sh) | ||
424 | { | ||
425 | __be32 crc; | ||
426 | int err; | ||
427 | |||
428 | err = wbuf_read(sb, ofs, sizeof(*sh), sh); | ||
429 | if (err) | ||
430 | return err; | ||
431 | crc = logfs_crc32(sh, sizeof(*sh), 4); | ||
432 | if (crc != sh->crc) { | ||
433 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " | ||
434 | "got %x\n", ofs, be32_to_cpu(sh->crc), | ||
435 | be32_to_cpu(crc)); | ||
436 | return -EIO; | ||
437 | } | ||
438 | return 0; | ||
439 | } | ||
440 | #endif | ||
441 | |||
442 | static int read_obj_header(struct super_block *sb, u64 ofs, | ||
443 | struct logfs_object_header *oh) | ||
444 | { | ||
445 | __be32 crc; | ||
446 | int err; | ||
447 | |||
448 | err = wbuf_read(sb, ofs, sizeof(*oh), oh); | ||
449 | if (err) | ||
450 | return err; | ||
451 | crc = logfs_crc32(oh, sizeof(*oh) - 4, 4); | ||
452 | if (crc != oh->crc) { | ||
453 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " | ||
454 | "got %x\n", ofs, be32_to_cpu(oh->crc), | ||
455 | be32_to_cpu(crc)); | ||
456 | return -EIO; | ||
457 | } | ||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | static void move_btree_to_page(struct inode *inode, struct page *page, | ||
462 | __be64 *data) | ||
463 | { | ||
464 | struct super_block *sb = inode->i_sb; | ||
465 | struct logfs_super *super = logfs_super(sb); | ||
466 | struct btree_head128 *head = &super->s_object_alias_tree; | ||
467 | struct logfs_block *block; | ||
468 | struct object_alias_item *item, *next; | ||
469 | |||
470 | if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS)) | ||
471 | return; | ||
472 | |||
473 | block = btree_remove128(head, inode->i_ino, page->index); | ||
474 | if (!block) | ||
475 | return; | ||
476 | |||
477 | log_blockmove("move_btree_to_page(%llx, %llx, %x)\n", | ||
478 | block->ino, block->bix, block->level); | ||
479 | list_for_each_entry_safe(item, next, &block->item_list, list) { | ||
480 | data[item->child_no] = item->val; | ||
481 | list_del(&item->list); | ||
482 | mempool_free(item, super->s_alias_pool); | ||
483 | } | ||
484 | block->page = page; | ||
485 | SetPagePrivate(page); | ||
486 | page->private = (unsigned long)block; | ||
487 | block->ops = &indirect_block_ops; | ||
488 | initialize_block_counters(page, block, data, 0); | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * This silences a false, yet annoying gcc warning. I hate it when my editor | ||
493 | * jumps into bitops.h each time I recompile this file. | ||
494 | * TODO: Complain to gcc folks about this and upgrade compiler. | ||
495 | */ | ||
496 | static unsigned long fnb(const unsigned long *addr, | ||
497 | unsigned long size, unsigned long offset) | ||
498 | { | ||
499 | return find_next_bit(addr, size, offset); | ||
500 | } | ||
501 | |||
502 | void move_page_to_btree(struct page *page) | ||
503 | { | ||
504 | struct logfs_block *block = logfs_block(page); | ||
505 | struct super_block *sb = block->sb; | ||
506 | struct logfs_super *super = logfs_super(sb); | ||
507 | struct object_alias_item *item; | ||
508 | unsigned long pos; | ||
509 | __be64 *child; | ||
510 | int err; | ||
511 | |||
512 | if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) { | ||
513 | block->ops->free_block(sb, block); | ||
514 | return; | ||
515 | } | ||
516 | log_blockmove("move_page_to_btree(%llx, %llx, %x)\n", | ||
517 | block->ino, block->bix, block->level); | ||
518 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; | ||
519 | |||
520 | for (pos = 0; ; pos++) { | ||
521 | pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos); | ||
522 | if (pos >= LOGFS_BLOCK_FACTOR) | ||
523 | break; | ||
524 | |||
525 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); | ||
526 | BUG_ON(!item); /* mempool empty */ | ||
527 | memset(item, 0, sizeof(*item)); | ||
528 | |||
529 | child = kmap_atomic(page, KM_USER0); | ||
530 | item->val = child[pos]; | ||
531 | kunmap_atomic(child, KM_USER0); | ||
532 | item->child_no = pos; | ||
533 | list_add(&item->list, &block->item_list); | ||
534 | } | ||
535 | block->page = NULL; | ||
536 | ClearPagePrivate(page); | ||
537 | page->private = 0; | ||
538 | block->ops = &btree_block_ops; | ||
539 | err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, | ||
540 | block); | ||
541 | BUG_ON(err); /* mempool empty */ | ||
542 | ClearPageUptodate(page); | ||
543 | } | ||
544 | |||
545 | static int __logfs_segment_read(struct inode *inode, void *buf, | ||
546 | u64 ofs, u64 bix, level_t level) | ||
547 | { | ||
548 | struct super_block *sb = inode->i_sb; | ||
549 | void *compressor_buf = logfs_super(sb)->s_compressed_je; | ||
550 | struct logfs_object_header oh; | ||
551 | __be32 crc; | ||
552 | u16 len; | ||
553 | int err, block_len; | ||
554 | |||
555 | block_len = obj_len(sb, obj_type(inode, level)); | ||
556 | err = read_obj_header(sb, ofs, &oh); | ||
557 | if (err) | ||
558 | goto out_err; | ||
559 | |||
560 | err = -EIO; | ||
561 | if (be64_to_cpu(oh.ino) != inode->i_ino | ||
562 | || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) { | ||
563 | printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: " | ||
564 | "expected (%lx, %llx), got (%llx, %llx)\n", | ||
565 | ofs, inode->i_ino, bix, | ||
566 | be64_to_cpu(oh.ino), be64_to_cpu(oh.bix)); | ||
567 | goto out_err; | ||
568 | } | ||
569 | |||
570 | len = be16_to_cpu(oh.len); | ||
571 | |||
572 | switch (oh.compr) { | ||
573 | case COMPR_NONE: | ||
574 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf); | ||
575 | if (err) | ||
576 | goto out_err; | ||
577 | crc = logfs_crc32(buf, len, 0); | ||
578 | if (crc != oh.data_crc) { | ||
579 | printk(KERN_ERR"LOGFS: uncompressed data crc error at " | ||
580 | "%llx: expected %x, got %x\n", ofs, | ||
581 | be32_to_cpu(oh.data_crc), | ||
582 | be32_to_cpu(crc)); | ||
583 | goto out_err; | ||
584 | } | ||
585 | break; | ||
586 | case COMPR_ZLIB: | ||
587 | mutex_lock(&logfs_super(sb)->s_journal_mutex); | ||
588 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, | ||
589 | compressor_buf); | ||
590 | if (err) { | ||
591 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | ||
592 | goto out_err; | ||
593 | } | ||
594 | crc = logfs_crc32(compressor_buf, len, 0); | ||
595 | if (crc != oh.data_crc) { | ||
596 | printk(KERN_ERR"LOGFS: compressed data crc error at " | ||
597 | "%llx: expected %x, got %x\n", ofs, | ||
598 | be32_to_cpu(oh.data_crc), | ||
599 | be32_to_cpu(crc)); | ||
600 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | ||
601 | goto out_err; | ||
602 | } | ||
603 | err = logfs_uncompress(compressor_buf, buf, len, block_len); | ||
604 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | ||
605 | if (err) { | ||
606 | printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs); | ||
607 | goto out_err; | ||
608 | } | ||
609 | break; | ||
610 | default: | ||
611 | LOGFS_BUG(sb); | ||
612 | err = -EIO; | ||
613 | goto out_err; | ||
614 | } | ||
615 | return 0; | ||
616 | |||
617 | out_err: | ||
618 | logfs_set_ro(sb); | ||
619 | printk(KERN_ERR"LOGFS: device is read-only now\n"); | ||
620 | LOGFS_BUG(sb); | ||
621 | return err; | ||
622 | } | ||
623 | |||
624 | /** | ||
625 | * logfs_segment_read - read data block from object store | ||
626 | * @inode: inode containing data | ||
627 | * @buf: data buffer | ||
628 | * @ofs: physical data offset | ||
629 | * @bix: block index | ||
630 | * @level: block level | ||
631 | * | ||
632 | * Returns 0 on success or a negative errno. | ||
633 | */ | ||
634 | int logfs_segment_read(struct inode *inode, struct page *page, | ||
635 | u64 ofs, u64 bix, level_t level) | ||
636 | { | ||
637 | int err; | ||
638 | void *buf; | ||
639 | |||
640 | if (PageUptodate(page)) | ||
641 | return 0; | ||
642 | |||
643 | ofs &= ~LOGFS_FULLY_POPULATED; | ||
644 | |||
645 | buf = kmap(page); | ||
646 | err = __logfs_segment_read(inode, buf, ofs, bix, level); | ||
647 | if (!err) { | ||
648 | move_btree_to_page(inode, page, buf); | ||
649 | SetPageUptodate(page); | ||
650 | } | ||
651 | kunmap(page); | ||
652 | log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n", | ||
653 | inode->i_ino, bix, level, ofs, err); | ||
654 | return err; | ||
655 | } | ||
656 | |||
657 | int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) | ||
658 | { | ||
659 | struct super_block *sb = inode->i_sb; | ||
660 | struct logfs_super *super = logfs_super(sb); | ||
661 | struct logfs_object_header h; | ||
662 | u16 len; | ||
663 | int err; | ||
664 | |||
665 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; | ||
666 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); | ||
667 | BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED); | ||
668 | if (!shadow->old_ofs) | ||
669 | return 0; | ||
670 | |||
671 | log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n", | ||
672 | shadow->ino, shadow->bix, shadow->gc_level, | ||
673 | shadow->old_ofs, shadow->new_ofs, | ||
674 | shadow->old_len, shadow->new_len); | ||
675 | err = read_obj_header(sb, shadow->old_ofs, &h); | ||
676 | LOGFS_BUG_ON(err, sb); | ||
677 | LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb); | ||
678 | LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix), | ||
679 | shrink_level(shadow->gc_level)), sb); | ||
680 | |||
681 | if (shadow->gc_level == 0) | ||
682 | len = be16_to_cpu(h.len); | ||
683 | else | ||
684 | len = obj_len(sb, h.type); | ||
685 | shadow->old_len = len + sizeof(h); | ||
686 | return 0; | ||
687 | } | ||
688 | |||
689 | void freeseg(struct super_block *sb, u32 segno) | ||
690 | { | ||
691 | struct logfs_super *super = logfs_super(sb); | ||
692 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | ||
693 | struct page *page; | ||
694 | u64 ofs, start, end; | ||
695 | |||
696 | start = dev_ofs(sb, segno, 0); | ||
697 | end = dev_ofs(sb, segno + 1, 0); | ||
698 | for (ofs = start; ofs < end; ofs += PAGE_SIZE) { | ||
699 | page = find_get_page(mapping, ofs >> PAGE_SHIFT); | ||
700 | if (!page) | ||
701 | continue; | ||
702 | ClearPagePrivate(page); | ||
703 | page_cache_release(page); | ||
704 | } | ||
705 | } | ||
706 | |||
707 | int logfs_open_area(struct logfs_area *area, size_t bytes) | ||
708 | { | ||
709 | struct super_block *sb = area->a_sb; | ||
710 | struct logfs_super *super = logfs_super(sb); | ||
711 | int err, closed = 0; | ||
712 | |||
713 | if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize) | ||
714 | return 0; | ||
715 | |||
716 | if (area->a_is_open) { | ||
717 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); | ||
718 | u32 len = super->s_segsize - area->a_written_bytes; | ||
719 | |||
720 | log_gc("logfs_close_area(%x)\n", area->a_segno); | ||
721 | pad_wbuf(area, 1); | ||
722 | super->s_devops->writeseg(area->a_sb, ofs, len); | ||
723 | freeseg(sb, area->a_segno); | ||
724 | closed = 1; | ||
725 | } | ||
726 | |||
727 | area->a_used_bytes = 0; | ||
728 | area->a_written_bytes = 0; | ||
729 | again: | ||
730 | area->a_ops->get_free_segment(area); | ||
731 | area->a_ops->get_erase_count(area); | ||
732 | |||
733 | log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level); | ||
734 | err = area->a_ops->erase_segment(area); | ||
735 | if (err) { | ||
736 | printk(KERN_WARNING "LogFS: Error erasing segment %x\n", | ||
737 | area->a_segno); | ||
738 | logfs_mark_segment_bad(sb, area->a_segno); | ||
739 | goto again; | ||
740 | } | ||
741 | area->a_is_open = 1; | ||
742 | return closed; | ||
743 | } | ||
744 | |||
745 | void logfs_sync_area(struct logfs_area *area) | ||
746 | { | ||
747 | struct super_block *sb = area->a_sb; | ||
748 | struct logfs_super *super = logfs_super(sb); | ||
749 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); | ||
750 | u32 len = (area->a_used_bytes - area->a_written_bytes); | ||
751 | |||
752 | if (super->s_writesize) | ||
753 | len &= ~(super->s_writesize - 1); | ||
754 | if (len == 0) | ||
755 | return; | ||
756 | pad_wbuf(area, 0); | ||
757 | super->s_devops->writeseg(sb, ofs, len); | ||
758 | area->a_written_bytes += len; | ||
759 | } | ||
760 | |||
761 | void logfs_sync_segments(struct super_block *sb) | ||
762 | { | ||
763 | struct logfs_super *super = logfs_super(sb); | ||
764 | int i; | ||
765 | |||
766 | for_each_area(i) | ||
767 | logfs_sync_area(super->s_area[i]); | ||
768 | } | ||
769 | |||
770 | /* | ||
771 | * Pick a free segment to be used for this area. Effectively takes a | ||
772 | * candidate from the free list (not really a candidate anymore). | ||
773 | */ | ||
774 | static void ostore_get_free_segment(struct logfs_area *area) | ||
775 | { | ||
776 | struct super_block *sb = area->a_sb; | ||
777 | struct logfs_super *super = logfs_super(sb); | ||
778 | |||
779 | if (super->s_free_list.count == 0) { | ||
780 | printk(KERN_ERR"LOGFS: ran out of free segments\n"); | ||
781 | LOGFS_BUG(sb); | ||
782 | } | ||
783 | |||
784 | area->a_segno = get_best_cand(sb, &super->s_free_list, NULL); | ||
785 | } | ||
786 | |||
787 | static void ostore_get_erase_count(struct logfs_area *area) | ||
788 | { | ||
789 | struct logfs_segment_entry se; | ||
790 | u32 ec_level; | ||
791 | |||
792 | logfs_get_segment_entry(area->a_sb, area->a_segno, &se); | ||
793 | BUG_ON(se.ec_level == cpu_to_be32(BADSEG) || | ||
794 | se.valid == cpu_to_be32(RESERVED)); | ||
795 | |||
796 | ec_level = be32_to_cpu(se.ec_level); | ||
797 | area->a_erase_count = (ec_level >> 4) + 1; | ||
798 | } | ||
799 | |||
800 | static int ostore_erase_segment(struct logfs_area *area) | ||
801 | { | ||
802 | struct super_block *sb = area->a_sb; | ||
803 | struct logfs_segment_header sh; | ||
804 | u64 ofs; | ||
805 | int err; | ||
806 | |||
807 | err = logfs_erase_segment(sb, area->a_segno, 0); | ||
808 | if (err) | ||
809 | return err; | ||
810 | |||
811 | sh.pad = 0; | ||
812 | sh.type = SEG_OSTORE; | ||
813 | sh.level = (__force u8)area->a_level; | ||
814 | sh.segno = cpu_to_be32(area->a_segno); | ||
815 | sh.ec = cpu_to_be32(area->a_erase_count); | ||
816 | sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); | ||
817 | sh.crc = logfs_crc32(&sh, sizeof(sh), 4); | ||
818 | |||
819 | logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, | ||
820 | area->a_level); | ||
821 | |||
822 | ofs = dev_ofs(sb, area->a_segno, 0); | ||
823 | area->a_used_bytes = sizeof(sh); | ||
824 | logfs_buf_write(area, ofs, &sh, sizeof(sh)); | ||
825 | return 0; | ||
826 | } | ||
827 | |||
828 | static const struct logfs_area_ops ostore_area_ops = { | ||
829 | .get_free_segment = ostore_get_free_segment, | ||
830 | .get_erase_count = ostore_get_erase_count, | ||
831 | .erase_segment = ostore_erase_segment, | ||
832 | }; | ||
833 | |||
834 | static void free_area(struct logfs_area *area) | ||
835 | { | ||
836 | if (area) | ||
837 | freeseg(area->a_sb, area->a_segno); | ||
838 | kfree(area); | ||
839 | } | ||
840 | |||
841 | static struct logfs_area *alloc_area(struct super_block *sb) | ||
842 | { | ||
843 | struct logfs_area *area; | ||
844 | |||
845 | area = kzalloc(sizeof(*area), GFP_KERNEL); | ||
846 | if (!area) | ||
847 | return NULL; | ||
848 | |||
849 | area->a_sb = sb; | ||
850 | return area; | ||
851 | } | ||
852 | |||
853 | static void map_invalidatepage(struct page *page, unsigned long l) | ||
854 | { | ||
855 | BUG(); | ||
856 | } | ||
857 | |||
858 | static int map_releasepage(struct page *page, gfp_t g) | ||
859 | { | ||
860 | /* Don't release these pages */ | ||
861 | return 0; | ||
862 | } | ||
863 | |||
864 | static const struct address_space_operations mapping_aops = { | ||
865 | .invalidatepage = map_invalidatepage, | ||
866 | .releasepage = map_releasepage, | ||
867 | .set_page_dirty = __set_page_dirty_nobuffers, | ||
868 | }; | ||
869 | |||
870 | int logfs_init_mapping(struct super_block *sb) | ||
871 | { | ||
872 | struct logfs_super *super = logfs_super(sb); | ||
873 | struct address_space *mapping; | ||
874 | struct inode *inode; | ||
875 | |||
876 | inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING); | ||
877 | if (IS_ERR(inode)) | ||
878 | return PTR_ERR(inode); | ||
879 | super->s_mapping_inode = inode; | ||
880 | mapping = inode->i_mapping; | ||
881 | mapping->a_ops = &mapping_aops; | ||
882 | /* Would it be possible to use __GFP_HIGHMEM as well? */ | ||
883 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
884 | return 0; | ||
885 | } | ||
886 | |||
887 | int logfs_init_areas(struct super_block *sb) | ||
888 | { | ||
889 | struct logfs_super *super = logfs_super(sb); | ||
890 | int i = -1; | ||
891 | |||
892 | super->s_alias_pool = mempool_create_kmalloc_pool(600, | ||
893 | sizeof(struct object_alias_item)); | ||
894 | if (!super->s_alias_pool) | ||
895 | return -ENOMEM; | ||
896 | |||
897 | super->s_journal_area = alloc_area(sb); | ||
898 | if (!super->s_journal_area) | ||
899 | goto err; | ||
900 | |||
901 | for_each_area(i) { | ||
902 | super->s_area[i] = alloc_area(sb); | ||
903 | if (!super->s_area[i]) | ||
904 | goto err; | ||
905 | super->s_area[i]->a_level = GC_LEVEL(i); | ||
906 | super->s_area[i]->a_ops = &ostore_area_ops; | ||
907 | } | ||
908 | btree_init_mempool128(&super->s_object_alias_tree, | ||
909 | super->s_btree_pool); | ||
910 | return 0; | ||
911 | |||
912 | err: | ||
913 | for (i--; i >= 0; i--) | ||
914 | free_area(super->s_area[i]); | ||
915 | free_area(super->s_journal_area); | ||
916 | logfs_mempool_destroy(super->s_alias_pool); | ||
917 | return -ENOMEM; | ||
918 | } | ||
919 | |||
920 | void logfs_cleanup_areas(struct super_block *sb) | ||
921 | { | ||
922 | struct logfs_super *super = logfs_super(sb); | ||
923 | int i; | ||
924 | |||
925 | btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); | ||
926 | for_each_area(i) | ||
927 | free_area(super->s_area[i]); | ||
928 | free_area(super->s_journal_area); | ||
929 | destroy_meta_inode(super->s_mapping_inode); | ||
930 | } | ||