diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-06 16:18:03 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-06 16:18:03 -0500 |
| commit | 66b89159c25a47d2177743526c61b5ada7acc39e (patch) | |
| tree | b092b859ca01d7544a666c95f940144b0ef3b35b /fs/logfs/segment.c | |
| parent | 87c7ae06cc50bcbcdcc60d64a959ca0b9b71f892 (diff) | |
| parent | c2f843f03d658e9ab2a1a455f2c1851fd6a869af (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/joern/logfs
* git://git.kernel.org/pub/scm/linux/kernel/git/joern/logfs:
[LogFS] Change magic number
[LogFS] Remove h_version field
[LogFS] Check feature flags
[LogFS] Only write journal if dirty
[LogFS] Fix bdev erases
[LogFS] Silence gcc
[LogFS] Prevent 64bit divisions in hash_index
[LogFS] Plug memory leak on error paths
[LogFS] Add MAINTAINERS entry
[LogFS] add new flash file system
Fixed up trivial conflict in lib/Kconfig, and a semantic conflict in
fs/logfs/inode.c introduced by write_inode() being changed to use
writeback_control' by commit a9185b41a4f84971b930c519f0c63bd450c4810d
("pass writeback_control to ->write_inode")
Diffstat (limited to 'fs/logfs/segment.c')
| -rw-r--r-- | fs/logfs/segment.c | 927 |
1 files changed, 927 insertions, 0 deletions
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c new file mode 100644 index 000000000000..1a14f9910d55 --- /dev/null +++ b/fs/logfs/segment.c | |||
| @@ -0,0 +1,927 @@ | |||
| 1 | /* | ||
| 2 | * fs/logfs/segment.c - Handling the Object Store | ||
| 3 | * | ||
| 4 | * As should be obvious for Linux kernel code, license is GPLv2 | ||
| 5 | * | ||
| 6 | * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> | ||
| 7 | * | ||
| 8 | * Object store or ostore makes up the complete device with exception of | ||
| 9 | * the superblock and journal areas. Apart from its own metadata it stores | ||
| 10 | * three kinds of objects: inodes, dentries and blocks, both data and indirect. | ||
| 11 | */ | ||
| 12 | #include "logfs.h" | ||
| 13 | |||
| 14 | static int logfs_mark_segment_bad(struct super_block *sb, u32 segno) | ||
| 15 | { | ||
| 16 | struct logfs_super *super = logfs_super(sb); | ||
| 17 | struct btree_head32 *head = &super->s_reserved_segments; | ||
| 18 | int err; | ||
| 19 | |||
| 20 | err = btree_insert32(head, segno, (void *)1, GFP_NOFS); | ||
| 21 | if (err) | ||
| 22 | return err; | ||
| 23 | logfs_super(sb)->s_bad_segments++; | ||
| 24 | /* FIXME: write to journal */ | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | |||
| 28 | int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase) | ||
| 29 | { | ||
| 30 | struct logfs_super *super = logfs_super(sb); | ||
| 31 | |||
| 32 | super->s_gec++; | ||
| 33 | |||
| 34 | return super->s_devops->erase(sb, (u64)segno << super->s_segshift, | ||
| 35 | super->s_segsize, ensure_erase); | ||
| 36 | } | ||
| 37 | |||
| 38 | static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes) | ||
| 39 | { | ||
| 40 | s32 ofs; | ||
| 41 | |||
| 42 | logfs_open_area(area, bytes); | ||
| 43 | |||
| 44 | ofs = area->a_used_bytes; | ||
| 45 | area->a_used_bytes += bytes; | ||
| 46 | BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize); | ||
| 47 | |||
| 48 | return dev_ofs(area->a_sb, area->a_segno, ofs); | ||
| 49 | } | ||
| 50 | |||
| 51 | static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, | ||
| 52 | int use_filler) | ||
| 53 | { | ||
| 54 | struct logfs_super *super = logfs_super(sb); | ||
| 55 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | ||
| 56 | filler_t *filler = super->s_devops->readpage; | ||
| 57 | struct page *page; | ||
| 58 | |||
| 59 | BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS); | ||
| 60 | if (use_filler) | ||
| 61 | page = read_cache_page(mapping, index, filler, sb); | ||
| 62 | else { | ||
| 63 | page = find_or_create_page(mapping, index, GFP_NOFS); | ||
| 64 | unlock_page(page); | ||
| 65 | } | ||
| 66 | return page; | ||
| 67 | } | ||
| 68 | |||
| 69 | void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, | ||
| 70 | int use_filler) | ||
| 71 | { | ||
| 72 | pgoff_t index = ofs >> PAGE_SHIFT; | ||
| 73 | struct page *page; | ||
| 74 | long offset = ofs & (PAGE_SIZE-1); | ||
| 75 | long copylen; | ||
| 76 | |||
| 77 | /* Only logfs_wbuf_recover may use len==0 */ | ||
| 78 | BUG_ON(!len && !use_filler); | ||
| 79 | do { | ||
| 80 | copylen = min((ulong)len, PAGE_SIZE - offset); | ||
| 81 | |||
| 82 | page = get_mapping_page(area->a_sb, index, use_filler); | ||
| 83 | SetPageUptodate(page); | ||
| 84 | BUG_ON(!page); /* FIXME: reserve a pool */ | ||
| 85 | memcpy(page_address(page) + offset, buf, copylen); | ||
| 86 | SetPagePrivate(page); | ||
| 87 | page_cache_release(page); | ||
| 88 | |||
| 89 | buf += copylen; | ||
| 90 | len -= copylen; | ||
| 91 | offset = 0; | ||
| 92 | index++; | ||
| 93 | } while (len); | ||
| 94 | } | ||
| 95 | |||
| 96 | /* | ||
| 97 | * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. | ||
| 98 | */ | ||
| 99 | static void pad_wbuf(struct logfs_area *area, int final) | ||
| 100 | { | ||
| 101 | struct super_block *sb = area->a_sb; | ||
| 102 | struct logfs_super *super = logfs_super(sb); | ||
| 103 | struct page *page; | ||
| 104 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); | ||
| 105 | pgoff_t index = ofs >> PAGE_SHIFT; | ||
| 106 | long offset = ofs & (PAGE_SIZE-1); | ||
| 107 | u32 len = PAGE_SIZE - offset; | ||
| 108 | |||
| 109 | if (len == PAGE_SIZE) { | ||
| 110 | /* The math in this function can surely use some love */ | ||
| 111 | len = 0; | ||
| 112 | } | ||
| 113 | if (len) { | ||
| 114 | BUG_ON(area->a_used_bytes >= super->s_segsize); | ||
| 115 | |||
| 116 | page = get_mapping_page(area->a_sb, index, 0); | ||
| 117 | BUG_ON(!page); /* FIXME: reserve a pool */ | ||
| 118 | memset(page_address(page) + offset, 0xff, len); | ||
| 119 | SetPagePrivate(page); | ||
| 120 | page_cache_release(page); | ||
| 121 | } | ||
| 122 | |||
| 123 | if (!final) | ||
| 124 | return; | ||
| 125 | |||
| 126 | area->a_used_bytes += len; | ||
| 127 | for ( ; area->a_used_bytes < super->s_segsize; | ||
| 128 | area->a_used_bytes += PAGE_SIZE) { | ||
| 129 | /* Memset another page */ | ||
| 130 | index++; | ||
| 131 | page = get_mapping_page(area->a_sb, index, 0); | ||
| 132 | BUG_ON(!page); /* FIXME: reserve a pool */ | ||
| 133 | memset(page_address(page), 0xff, PAGE_SIZE); | ||
| 134 | SetPagePrivate(page); | ||
| 135 | page_cache_release(page); | ||
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | /* | ||
| 140 | * We have to be careful with the alias tree. Since lookup is done by bix, | ||
| 141 | * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with | ||
| 142 | * indirect blocks. So always use it through accessor functions. | ||
| 143 | */ | ||
| 144 | static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix, | ||
| 145 | level_t level) | ||
| 146 | { | ||
| 147 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; | ||
| 148 | pgoff_t index = logfs_pack_index(bix, level); | ||
| 149 | |||
| 150 | return btree_lookup128(head, ino, index); | ||
| 151 | } | ||
| 152 | |||
| 153 | static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix, | ||
| 154 | level_t level, void *val) | ||
| 155 | { | ||
| 156 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; | ||
| 157 | pgoff_t index = logfs_pack_index(bix, level); | ||
| 158 | |||
| 159 | return btree_insert128(head, ino, index, val, GFP_NOFS); | ||
| 160 | } | ||
| 161 | |||
| 162 | static int btree_write_alias(struct super_block *sb, struct logfs_block *block, | ||
| 163 | write_alias_t *write_one_alias) | ||
| 164 | { | ||
| 165 | struct object_alias_item *item; | ||
| 166 | int err; | ||
| 167 | |||
| 168 | list_for_each_entry(item, &block->item_list, list) { | ||
| 169 | err = write_alias_journal(sb, block->ino, block->bix, | ||
| 170 | block->level, item->child_no, item->val); | ||
| 171 | if (err) | ||
| 172 | return err; | ||
| 173 | } | ||
| 174 | return 0; | ||
| 175 | } | ||
| 176 | |||
| 177 | static gc_level_t btree_block_level(struct logfs_block *block) | ||
| 178 | { | ||
| 179 | return expand_level(block->ino, block->level); | ||
| 180 | } | ||
| 181 | |||
| 182 | static struct logfs_block_ops btree_block_ops = { | ||
| 183 | .write_block = btree_write_block, | ||
| 184 | .block_level = btree_block_level, | ||
| 185 | .free_block = __free_block, | ||
| 186 | .write_alias = btree_write_alias, | ||
| 187 | }; | ||
| 188 | |||
| 189 | int logfs_load_object_aliases(struct super_block *sb, | ||
| 190 | struct logfs_obj_alias *oa, int count) | ||
| 191 | { | ||
| 192 | struct logfs_super *super = logfs_super(sb); | ||
| 193 | struct logfs_block *block; | ||
| 194 | struct object_alias_item *item; | ||
| 195 | u64 ino, bix; | ||
| 196 | level_t level; | ||
| 197 | int i, err; | ||
| 198 | |||
| 199 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; | ||
| 200 | count /= sizeof(*oa); | ||
| 201 | for (i = 0; i < count; i++) { | ||
| 202 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); | ||
| 203 | if (!item) | ||
| 204 | return -ENOMEM; | ||
| 205 | memset(item, 0, sizeof(*item)); | ||
| 206 | |||
| 207 | super->s_no_object_aliases++; | ||
| 208 | item->val = oa[i].val; | ||
| 209 | item->child_no = be16_to_cpu(oa[i].child_no); | ||
| 210 | |||
| 211 | ino = be64_to_cpu(oa[i].ino); | ||
| 212 | bix = be64_to_cpu(oa[i].bix); | ||
| 213 | level = LEVEL(oa[i].level); | ||
| 214 | |||
| 215 | log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n", | ||
| 216 | ino, bix, level, item->child_no, | ||
| 217 | be64_to_cpu(item->val)); | ||
| 218 | block = alias_tree_lookup(sb, ino, bix, level); | ||
| 219 | if (!block) { | ||
| 220 | block = __alloc_block(sb, ino, bix, level); | ||
| 221 | block->ops = &btree_block_ops; | ||
| 222 | err = alias_tree_insert(sb, ino, bix, level, block); | ||
| 223 | BUG_ON(err); /* mempool empty */ | ||
| 224 | } | ||
| 225 | if (test_and_set_bit(item->child_no, block->alias_map)) { | ||
| 226 | printk(KERN_ERR"LogFS: Alias collision detected\n"); | ||
| 227 | return -EIO; | ||
| 228 | } | ||
| 229 | list_move_tail(&block->alias_list, &super->s_object_alias); | ||
| 230 | list_add(&item->list, &block->item_list); | ||
| 231 | } | ||
| 232 | return 0; | ||
| 233 | } | ||
| 234 | |||
| 235 | static void kill_alias(void *_block, unsigned long ignore0, | ||
| 236 | u64 ignore1, u64 ignore2, size_t ignore3) | ||
| 237 | { | ||
| 238 | struct logfs_block *block = _block; | ||
| 239 | struct super_block *sb = block->sb; | ||
| 240 | struct logfs_super *super = logfs_super(sb); | ||
| 241 | struct object_alias_item *item; | ||
| 242 | |||
| 243 | while (!list_empty(&block->item_list)) { | ||
| 244 | item = list_entry(block->item_list.next, typeof(*item), list); | ||
| 245 | list_del(&item->list); | ||
| 246 | mempool_free(item, super->s_alias_pool); | ||
| 247 | } | ||
| 248 | block->ops->free_block(sb, block); | ||
| 249 | } | ||
| 250 | |||
| 251 | static int obj_type(struct inode *inode, level_t level) | ||
| 252 | { | ||
| 253 | if (level == 0) { | ||
| 254 | if (S_ISDIR(inode->i_mode)) | ||
| 255 | return OBJ_DENTRY; | ||
| 256 | if (inode->i_ino == LOGFS_INO_MASTER) | ||
| 257 | return OBJ_INODE; | ||
| 258 | } | ||
| 259 | return OBJ_BLOCK; | ||
| 260 | } | ||
| 261 | |||
| 262 | static int obj_len(struct super_block *sb, int obj_type) | ||
| 263 | { | ||
| 264 | switch (obj_type) { | ||
| 265 | case OBJ_DENTRY: | ||
| 266 | return sizeof(struct logfs_disk_dentry); | ||
| 267 | case OBJ_INODE: | ||
| 268 | return sizeof(struct logfs_disk_inode); | ||
| 269 | case OBJ_BLOCK: | ||
| 270 | return sb->s_blocksize; | ||
| 271 | default: | ||
| 272 | BUG(); | ||
| 273 | } | ||
| 274 | } | ||
| 275 | |||
| 276 | static int __logfs_segment_write(struct inode *inode, void *buf, | ||
| 277 | struct logfs_shadow *shadow, int type, int len, int compr) | ||
| 278 | { | ||
| 279 | struct logfs_area *area; | ||
| 280 | struct super_block *sb = inode->i_sb; | ||
| 281 | s64 ofs; | ||
| 282 | struct logfs_object_header h; | ||
| 283 | int acc_len; | ||
| 284 | |||
| 285 | if (shadow->gc_level == 0) | ||
| 286 | acc_len = len; | ||
| 287 | else | ||
| 288 | acc_len = obj_len(sb, type); | ||
| 289 | |||
| 290 | area = get_area(sb, shadow->gc_level); | ||
| 291 | ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE); | ||
| 292 | LOGFS_BUG_ON(ofs <= 0, sb); | ||
| 293 | /* | ||
| 294 | * Order is important. logfs_get_free_bytes(), by modifying the | ||
| 295 | * segment file, may modify the content of the very page we're about | ||
| 296 | * to write now. Which is fine, as long as the calculated crc and | ||
| 297 | * written data still match. So do the modifications _before_ | ||
| 298 | * calculating the crc. | ||
| 299 | */ | ||
| 300 | |||
| 301 | h.len = cpu_to_be16(len); | ||
| 302 | h.type = type; | ||
| 303 | h.compr = compr; | ||
| 304 | h.ino = cpu_to_be64(inode->i_ino); | ||
| 305 | h.bix = cpu_to_be64(shadow->bix); | ||
| 306 | h.crc = logfs_crc32(&h, sizeof(h) - 4, 4); | ||
| 307 | h.data_crc = logfs_crc32(buf, len, 0); | ||
| 308 | |||
| 309 | logfs_buf_write(area, ofs, &h, sizeof(h)); | ||
| 310 | logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len); | ||
| 311 | |||
| 312 | shadow->new_ofs = ofs; | ||
| 313 | shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE; | ||
| 314 | |||
| 315 | return 0; | ||
| 316 | } | ||
| 317 | |||
| 318 | static s64 logfs_segment_write_compress(struct inode *inode, void *buf, | ||
| 319 | struct logfs_shadow *shadow, int type, int len) | ||
| 320 | { | ||
| 321 | struct super_block *sb = inode->i_sb; | ||
| 322 | void *compressor_buf = logfs_super(sb)->s_compressed_je; | ||
| 323 | ssize_t compr_len; | ||
| 324 | int ret; | ||
| 325 | |||
| 326 | mutex_lock(&logfs_super(sb)->s_journal_mutex); | ||
| 327 | compr_len = logfs_compress(buf, compressor_buf, len, len); | ||
| 328 | |||
| 329 | if (compr_len >= 0) { | ||
| 330 | ret = __logfs_segment_write(inode, compressor_buf, shadow, | ||
| 331 | type, compr_len, COMPR_ZLIB); | ||
| 332 | } else { | ||
| 333 | ret = __logfs_segment_write(inode, buf, shadow, type, len, | ||
| 334 | COMPR_NONE); | ||
| 335 | } | ||
| 336 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | ||
| 337 | return ret; | ||
| 338 | } | ||
| 339 | |||
| 340 | /** | ||
| 341 | * logfs_segment_write - write data block to object store | ||
| 342 | * @inode: inode containing data | ||
| 343 | * | ||
| 344 | * Returns an errno or zero. | ||
| 345 | */ | ||
| 346 | int logfs_segment_write(struct inode *inode, struct page *page, | ||
| 347 | struct logfs_shadow *shadow) | ||
| 348 | { | ||
| 349 | struct super_block *sb = inode->i_sb; | ||
| 350 | struct logfs_super *super = logfs_super(sb); | ||
| 351 | int do_compress, type, len; | ||
| 352 | int ret; | ||
| 353 | void *buf; | ||
| 354 | |||
| 355 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; | ||
| 356 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); | ||
| 357 | do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED; | ||
| 358 | if (shadow->gc_level != 0) { | ||
| 359 | /* temporarily disable compression for indirect blocks */ | ||
| 360 | do_compress = 0; | ||
| 361 | } | ||
| 362 | |||
| 363 | type = obj_type(inode, shrink_level(shadow->gc_level)); | ||
| 364 | len = obj_len(sb, type); | ||
| 365 | buf = kmap(page); | ||
| 366 | if (do_compress) | ||
| 367 | ret = logfs_segment_write_compress(inode, buf, shadow, type, | ||
| 368 | len); | ||
| 369 | else | ||
| 370 | ret = __logfs_segment_write(inode, buf, shadow, type, len, | ||
| 371 | COMPR_NONE); | ||
| 372 | kunmap(page); | ||
| 373 | |||
| 374 | log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n", | ||
| 375 | shadow->ino, shadow->bix, shadow->gc_level, | ||
| 376 | shadow->old_ofs, shadow->new_ofs, | ||
| 377 | shadow->old_len, shadow->new_len); | ||
| 378 | /* this BUG_ON did catch a locking bug. useful */ | ||
| 379 | BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1))); | ||
| 380 | return ret; | ||
| 381 | } | ||
| 382 | |||
| 383 | int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf) | ||
| 384 | { | ||
| 385 | pgoff_t index = ofs >> PAGE_SHIFT; | ||
| 386 | struct page *page; | ||
| 387 | long offset = ofs & (PAGE_SIZE-1); | ||
| 388 | long copylen; | ||
| 389 | |||
| 390 | while (len) { | ||
| 391 | copylen = min((ulong)len, PAGE_SIZE - offset); | ||
| 392 | |||
| 393 | page = get_mapping_page(sb, index, 1); | ||
| 394 | if (IS_ERR(page)) | ||
| 395 | return PTR_ERR(page); | ||
| 396 | memcpy(buf, page_address(page) + offset, copylen); | ||
| 397 | page_cache_release(page); | ||
| 398 | |||
| 399 | buf += copylen; | ||
| 400 | len -= copylen; | ||
| 401 | offset = 0; | ||
| 402 | index++; | ||
| 403 | } | ||
| 404 | return 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | /* | ||
| 408 | * The "position" of indirect blocks is ambiguous. It can be the position | ||
| 409 | * of any data block somewhere behind this indirect block. So we need to | ||
| 410 | * normalize the positions through logfs_block_mask() before comparing. | ||
| 411 | */ | ||
| 412 | static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level) | ||
| 413 | { | ||
| 414 | return (pos1 & logfs_block_mask(sb, level)) != | ||
| 415 | (pos2 & logfs_block_mask(sb, level)); | ||
| 416 | } | ||
| 417 | |||
| 418 | #if 0 | ||
| 419 | static int read_seg_header(struct super_block *sb, u64 ofs, | ||
| 420 | struct logfs_segment_header *sh) | ||
| 421 | { | ||
| 422 | __be32 crc; | ||
| 423 | int err; | ||
| 424 | |||
| 425 | err = wbuf_read(sb, ofs, sizeof(*sh), sh); | ||
| 426 | if (err) | ||
| 427 | return err; | ||
| 428 | crc = logfs_crc32(sh, sizeof(*sh), 4); | ||
| 429 | if (crc != sh->crc) { | ||
| 430 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " | ||
| 431 | "got %x\n", ofs, be32_to_cpu(sh->crc), | ||
| 432 | be32_to_cpu(crc)); | ||
| 433 | return -EIO; | ||
| 434 | } | ||
| 435 | return 0; | ||
| 436 | } | ||
| 437 | #endif | ||
| 438 | |||
| 439 | static int read_obj_header(struct super_block *sb, u64 ofs, | ||
| 440 | struct logfs_object_header *oh) | ||
| 441 | { | ||
| 442 | __be32 crc; | ||
| 443 | int err; | ||
| 444 | |||
| 445 | err = wbuf_read(sb, ofs, sizeof(*oh), oh); | ||
| 446 | if (err) | ||
| 447 | return err; | ||
| 448 | crc = logfs_crc32(oh, sizeof(*oh) - 4, 4); | ||
| 449 | if (crc != oh->crc) { | ||
| 450 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " | ||
| 451 | "got %x\n", ofs, be32_to_cpu(oh->crc), | ||
| 452 | be32_to_cpu(crc)); | ||
| 453 | return -EIO; | ||
| 454 | } | ||
| 455 | return 0; | ||
| 456 | } | ||
| 457 | |||
| 458 | static void move_btree_to_page(struct inode *inode, struct page *page, | ||
| 459 | __be64 *data) | ||
| 460 | { | ||
| 461 | struct super_block *sb = inode->i_sb; | ||
| 462 | struct logfs_super *super = logfs_super(sb); | ||
| 463 | struct btree_head128 *head = &super->s_object_alias_tree; | ||
| 464 | struct logfs_block *block; | ||
| 465 | struct object_alias_item *item, *next; | ||
| 466 | |||
| 467 | if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS)) | ||
| 468 | return; | ||
| 469 | |||
| 470 | block = btree_remove128(head, inode->i_ino, page->index); | ||
| 471 | if (!block) | ||
| 472 | return; | ||
| 473 | |||
| 474 | log_blockmove("move_btree_to_page(%llx, %llx, %x)\n", | ||
| 475 | block->ino, block->bix, block->level); | ||
| 476 | list_for_each_entry_safe(item, next, &block->item_list, list) { | ||
| 477 | data[item->child_no] = item->val; | ||
| 478 | list_del(&item->list); | ||
| 479 | mempool_free(item, super->s_alias_pool); | ||
| 480 | } | ||
| 481 | block->page = page; | ||
| 482 | SetPagePrivate(page); | ||
| 483 | page->private = (unsigned long)block; | ||
| 484 | block->ops = &indirect_block_ops; | ||
| 485 | initialize_block_counters(page, block, data, 0); | ||
| 486 | } | ||
| 487 | |||
| 488 | /* | ||
| 489 | * This silences a false, yet annoying gcc warning. I hate it when my editor | ||
| 490 | * jumps into bitops.h each time I recompile this file. | ||
| 491 | * TODO: Complain to gcc folks about this and upgrade compiler. | ||
| 492 | */ | ||
| 493 | static unsigned long fnb(const unsigned long *addr, | ||
| 494 | unsigned long size, unsigned long offset) | ||
| 495 | { | ||
| 496 | return find_next_bit(addr, size, offset); | ||
| 497 | } | ||
| 498 | |||
| 499 | void move_page_to_btree(struct page *page) | ||
| 500 | { | ||
| 501 | struct logfs_block *block = logfs_block(page); | ||
| 502 | struct super_block *sb = block->sb; | ||
| 503 | struct logfs_super *super = logfs_super(sb); | ||
| 504 | struct object_alias_item *item; | ||
| 505 | unsigned long pos; | ||
| 506 | __be64 *child; | ||
| 507 | int err; | ||
| 508 | |||
| 509 | if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) { | ||
| 510 | block->ops->free_block(sb, block); | ||
| 511 | return; | ||
| 512 | } | ||
| 513 | log_blockmove("move_page_to_btree(%llx, %llx, %x)\n", | ||
| 514 | block->ino, block->bix, block->level); | ||
| 515 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; | ||
| 516 | |||
| 517 | for (pos = 0; ; pos++) { | ||
| 518 | pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos); | ||
| 519 | if (pos >= LOGFS_BLOCK_FACTOR) | ||
| 520 | break; | ||
| 521 | |||
| 522 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); | ||
| 523 | BUG_ON(!item); /* mempool empty */ | ||
| 524 | memset(item, 0, sizeof(*item)); | ||
| 525 | |||
| 526 | child = kmap_atomic(page, KM_USER0); | ||
| 527 | item->val = child[pos]; | ||
| 528 | kunmap_atomic(child, KM_USER0); | ||
| 529 | item->child_no = pos; | ||
| 530 | list_add(&item->list, &block->item_list); | ||
| 531 | } | ||
| 532 | block->page = NULL; | ||
| 533 | ClearPagePrivate(page); | ||
| 534 | page->private = 0; | ||
| 535 | block->ops = &btree_block_ops; | ||
| 536 | err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, | ||
| 537 | block); | ||
| 538 | BUG_ON(err); /* mempool empty */ | ||
| 539 | ClearPageUptodate(page); | ||
| 540 | } | ||
| 541 | |||
| 542 | static int __logfs_segment_read(struct inode *inode, void *buf, | ||
| 543 | u64 ofs, u64 bix, level_t level) | ||
| 544 | { | ||
| 545 | struct super_block *sb = inode->i_sb; | ||
| 546 | void *compressor_buf = logfs_super(sb)->s_compressed_je; | ||
| 547 | struct logfs_object_header oh; | ||
| 548 | __be32 crc; | ||
| 549 | u16 len; | ||
| 550 | int err, block_len; | ||
| 551 | |||
| 552 | block_len = obj_len(sb, obj_type(inode, level)); | ||
| 553 | err = read_obj_header(sb, ofs, &oh); | ||
| 554 | if (err) | ||
| 555 | goto out_err; | ||
| 556 | |||
| 557 | err = -EIO; | ||
| 558 | if (be64_to_cpu(oh.ino) != inode->i_ino | ||
| 559 | || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) { | ||
| 560 | printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: " | ||
| 561 | "expected (%lx, %llx), got (%llx, %llx)\n", | ||
| 562 | ofs, inode->i_ino, bix, | ||
| 563 | be64_to_cpu(oh.ino), be64_to_cpu(oh.bix)); | ||
| 564 | goto out_err; | ||
| 565 | } | ||
| 566 | |||
| 567 | len = be16_to_cpu(oh.len); | ||
| 568 | |||
| 569 | switch (oh.compr) { | ||
| 570 | case COMPR_NONE: | ||
| 571 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf); | ||
| 572 | if (err) | ||
| 573 | goto out_err; | ||
| 574 | crc = logfs_crc32(buf, len, 0); | ||
| 575 | if (crc != oh.data_crc) { | ||
| 576 | printk(KERN_ERR"LOGFS: uncompressed data crc error at " | ||
| 577 | "%llx: expected %x, got %x\n", ofs, | ||
| 578 | be32_to_cpu(oh.data_crc), | ||
| 579 | be32_to_cpu(crc)); | ||
| 580 | goto out_err; | ||
| 581 | } | ||
| 582 | break; | ||
| 583 | case COMPR_ZLIB: | ||
| 584 | mutex_lock(&logfs_super(sb)->s_journal_mutex); | ||
| 585 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, | ||
| 586 | compressor_buf); | ||
| 587 | if (err) { | ||
| 588 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | ||
| 589 | goto out_err; | ||
| 590 | } | ||
| 591 | crc = logfs_crc32(compressor_buf, len, 0); | ||
| 592 | if (crc != oh.data_crc) { | ||
| 593 | printk(KERN_ERR"LOGFS: compressed data crc error at " | ||
| 594 | "%llx: expected %x, got %x\n", ofs, | ||
| 595 | be32_to_cpu(oh.data_crc), | ||
| 596 | be32_to_cpu(crc)); | ||
| 597 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | ||
| 598 | goto out_err; | ||
| 599 | } | ||
| 600 | err = logfs_uncompress(compressor_buf, buf, len, block_len); | ||
| 601 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | ||
| 602 | if (err) { | ||
| 603 | printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs); | ||
| 604 | goto out_err; | ||
| 605 | } | ||
| 606 | break; | ||
| 607 | default: | ||
| 608 | LOGFS_BUG(sb); | ||
| 609 | err = -EIO; | ||
| 610 | goto out_err; | ||
| 611 | } | ||
| 612 | return 0; | ||
| 613 | |||
| 614 | out_err: | ||
| 615 | logfs_set_ro(sb); | ||
| 616 | printk(KERN_ERR"LOGFS: device is read-only now\n"); | ||
| 617 | LOGFS_BUG(sb); | ||
| 618 | return err; | ||
| 619 | } | ||
| 620 | |||
| 621 | /** | ||
| 622 | * logfs_segment_read - read data block from object store | ||
| 623 | * @inode: inode containing data | ||
| 624 | * @buf: data buffer | ||
| 625 | * @ofs: physical data offset | ||
| 626 | * @bix: block index | ||
| 627 | * @level: block level | ||
| 628 | * | ||
| 629 | * Returns 0 on success or a negative errno. | ||
| 630 | */ | ||
| 631 | int logfs_segment_read(struct inode *inode, struct page *page, | ||
| 632 | u64 ofs, u64 bix, level_t level) | ||
| 633 | { | ||
| 634 | int err; | ||
| 635 | void *buf; | ||
| 636 | |||
| 637 | if (PageUptodate(page)) | ||
| 638 | return 0; | ||
| 639 | |||
| 640 | ofs &= ~LOGFS_FULLY_POPULATED; | ||
| 641 | |||
| 642 | buf = kmap(page); | ||
| 643 | err = __logfs_segment_read(inode, buf, ofs, bix, level); | ||
| 644 | if (!err) { | ||
| 645 | move_btree_to_page(inode, page, buf); | ||
| 646 | SetPageUptodate(page); | ||
| 647 | } | ||
| 648 | kunmap(page); | ||
| 649 | log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n", | ||
| 650 | inode->i_ino, bix, level, ofs, err); | ||
| 651 | return err; | ||
| 652 | } | ||
| 653 | |||
| 654 | int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) | ||
| 655 | { | ||
| 656 | struct super_block *sb = inode->i_sb; | ||
| 657 | struct logfs_super *super = logfs_super(sb); | ||
| 658 | struct logfs_object_header h; | ||
| 659 | u16 len; | ||
| 660 | int err; | ||
| 661 | |||
| 662 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; | ||
| 663 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); | ||
| 664 | BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED); | ||
| 665 | if (!shadow->old_ofs) | ||
| 666 | return 0; | ||
| 667 | |||
| 668 | log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n", | ||
| 669 | shadow->ino, shadow->bix, shadow->gc_level, | ||
| 670 | shadow->old_ofs, shadow->new_ofs, | ||
| 671 | shadow->old_len, shadow->new_len); | ||
| 672 | err = read_obj_header(sb, shadow->old_ofs, &h); | ||
| 673 | LOGFS_BUG_ON(err, sb); | ||
| 674 | LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb); | ||
| 675 | LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix), | ||
| 676 | shrink_level(shadow->gc_level)), sb); | ||
| 677 | |||
| 678 | if (shadow->gc_level == 0) | ||
| 679 | len = be16_to_cpu(h.len); | ||
| 680 | else | ||
| 681 | len = obj_len(sb, h.type); | ||
| 682 | shadow->old_len = len + sizeof(h); | ||
| 683 | return 0; | ||
| 684 | } | ||
| 685 | |||
| 686 | static void freeseg(struct super_block *sb, u32 segno) | ||
| 687 | { | ||
| 688 | struct logfs_super *super = logfs_super(sb); | ||
| 689 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | ||
| 690 | struct page *page; | ||
| 691 | u64 ofs, start, end; | ||
| 692 | |||
| 693 | start = dev_ofs(sb, segno, 0); | ||
| 694 | end = dev_ofs(sb, segno + 1, 0); | ||
| 695 | for (ofs = start; ofs < end; ofs += PAGE_SIZE) { | ||
| 696 | page = find_get_page(mapping, ofs >> PAGE_SHIFT); | ||
| 697 | if (!page) | ||
| 698 | continue; | ||
| 699 | ClearPagePrivate(page); | ||
| 700 | page_cache_release(page); | ||
| 701 | } | ||
| 702 | } | ||
| 703 | |||
| 704 | int logfs_open_area(struct logfs_area *area, size_t bytes) | ||
| 705 | { | ||
| 706 | struct super_block *sb = area->a_sb; | ||
| 707 | struct logfs_super *super = logfs_super(sb); | ||
| 708 | int err, closed = 0; | ||
| 709 | |||
| 710 | if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize) | ||
| 711 | return 0; | ||
| 712 | |||
| 713 | if (area->a_is_open) { | ||
| 714 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); | ||
| 715 | u32 len = super->s_segsize - area->a_written_bytes; | ||
| 716 | |||
| 717 | log_gc("logfs_close_area(%x)\n", area->a_segno); | ||
| 718 | pad_wbuf(area, 1); | ||
| 719 | super->s_devops->writeseg(area->a_sb, ofs, len); | ||
| 720 | freeseg(sb, area->a_segno); | ||
| 721 | closed = 1; | ||
| 722 | } | ||
| 723 | |||
| 724 | area->a_used_bytes = 0; | ||
| 725 | area->a_written_bytes = 0; | ||
| 726 | again: | ||
| 727 | area->a_ops->get_free_segment(area); | ||
| 728 | area->a_ops->get_erase_count(area); | ||
| 729 | |||
| 730 | log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level); | ||
| 731 | err = area->a_ops->erase_segment(area); | ||
| 732 | if (err) { | ||
| 733 | printk(KERN_WARNING "LogFS: Error erasing segment %x\n", | ||
| 734 | area->a_segno); | ||
| 735 | logfs_mark_segment_bad(sb, area->a_segno); | ||
| 736 | goto again; | ||
| 737 | } | ||
| 738 | area->a_is_open = 1; | ||
| 739 | return closed; | ||
| 740 | } | ||
| 741 | |||
| 742 | void logfs_sync_area(struct logfs_area *area) | ||
| 743 | { | ||
| 744 | struct super_block *sb = area->a_sb; | ||
| 745 | struct logfs_super *super = logfs_super(sb); | ||
| 746 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); | ||
| 747 | u32 len = (area->a_used_bytes - area->a_written_bytes); | ||
| 748 | |||
| 749 | if (super->s_writesize) | ||
| 750 | len &= ~(super->s_writesize - 1); | ||
| 751 | if (len == 0) | ||
| 752 | return; | ||
| 753 | pad_wbuf(area, 0); | ||
| 754 | super->s_devops->writeseg(sb, ofs, len); | ||
| 755 | area->a_written_bytes += len; | ||
| 756 | } | ||
| 757 | |||
| 758 | void logfs_sync_segments(struct super_block *sb) | ||
| 759 | { | ||
| 760 | struct logfs_super *super = logfs_super(sb); | ||
| 761 | int i; | ||
| 762 | |||
| 763 | for_each_area(i) | ||
| 764 | logfs_sync_area(super->s_area[i]); | ||
| 765 | } | ||
| 766 | |||
| 767 | /* | ||
| 768 | * Pick a free segment to be used for this area. Effectively takes a | ||
| 769 | * candidate from the free list (not really a candidate anymore). | ||
| 770 | */ | ||
| 771 | static void ostore_get_free_segment(struct logfs_area *area) | ||
| 772 | { | ||
| 773 | struct super_block *sb = area->a_sb; | ||
| 774 | struct logfs_super *super = logfs_super(sb); | ||
| 775 | |||
| 776 | if (super->s_free_list.count == 0) { | ||
| 777 | printk(KERN_ERR"LOGFS: ran out of free segments\n"); | ||
| 778 | LOGFS_BUG(sb); | ||
| 779 | } | ||
| 780 | |||
| 781 | area->a_segno = get_best_cand(sb, &super->s_free_list, NULL); | ||
| 782 | } | ||
| 783 | |||
| 784 | static void ostore_get_erase_count(struct logfs_area *area) | ||
| 785 | { | ||
| 786 | struct logfs_segment_entry se; | ||
| 787 | u32 ec_level; | ||
| 788 | |||
| 789 | logfs_get_segment_entry(area->a_sb, area->a_segno, &se); | ||
| 790 | BUG_ON(se.ec_level == cpu_to_be32(BADSEG) || | ||
| 791 | se.valid == cpu_to_be32(RESERVED)); | ||
| 792 | |||
| 793 | ec_level = be32_to_cpu(se.ec_level); | ||
| 794 | area->a_erase_count = (ec_level >> 4) + 1; | ||
| 795 | } | ||
| 796 | |||
| 797 | static int ostore_erase_segment(struct logfs_area *area) | ||
| 798 | { | ||
| 799 | struct super_block *sb = area->a_sb; | ||
| 800 | struct logfs_segment_header sh; | ||
| 801 | u64 ofs; | ||
| 802 | int err; | ||
| 803 | |||
| 804 | err = logfs_erase_segment(sb, area->a_segno, 0); | ||
| 805 | if (err) | ||
| 806 | return err; | ||
| 807 | |||
| 808 | sh.pad = 0; | ||
| 809 | sh.type = SEG_OSTORE; | ||
| 810 | sh.level = (__force u8)area->a_level; | ||
| 811 | sh.segno = cpu_to_be32(area->a_segno); | ||
| 812 | sh.ec = cpu_to_be32(area->a_erase_count); | ||
| 813 | sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); | ||
| 814 | sh.crc = logfs_crc32(&sh, sizeof(sh), 4); | ||
| 815 | |||
| 816 | logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, | ||
| 817 | area->a_level); | ||
| 818 | |||
| 819 | ofs = dev_ofs(sb, area->a_segno, 0); | ||
| 820 | area->a_used_bytes = sizeof(sh); | ||
| 821 | logfs_buf_write(area, ofs, &sh, sizeof(sh)); | ||
| 822 | return 0; | ||
| 823 | } | ||
| 824 | |||
| 825 | static const struct logfs_area_ops ostore_area_ops = { | ||
| 826 | .get_free_segment = ostore_get_free_segment, | ||
| 827 | .get_erase_count = ostore_get_erase_count, | ||
| 828 | .erase_segment = ostore_erase_segment, | ||
| 829 | }; | ||
| 830 | |||
| 831 | static void free_area(struct logfs_area *area) | ||
| 832 | { | ||
| 833 | if (area) | ||
| 834 | freeseg(area->a_sb, area->a_segno); | ||
| 835 | kfree(area); | ||
| 836 | } | ||
| 837 | |||
| 838 | static struct logfs_area *alloc_area(struct super_block *sb) | ||
| 839 | { | ||
| 840 | struct logfs_area *area; | ||
| 841 | |||
| 842 | area = kzalloc(sizeof(*area), GFP_KERNEL); | ||
| 843 | if (!area) | ||
| 844 | return NULL; | ||
| 845 | |||
| 846 | area->a_sb = sb; | ||
| 847 | return area; | ||
| 848 | } | ||
| 849 | |||
| 850 | static void map_invalidatepage(struct page *page, unsigned long l) | ||
| 851 | { | ||
| 852 | BUG(); | ||
| 853 | } | ||
| 854 | |||
| 855 | static int map_releasepage(struct page *page, gfp_t g) | ||
| 856 | { | ||
| 857 | /* Don't release these pages */ | ||
| 858 | return 0; | ||
| 859 | } | ||
| 860 | |||
| 861 | static const struct address_space_operations mapping_aops = { | ||
| 862 | .invalidatepage = map_invalidatepage, | ||
| 863 | .releasepage = map_releasepage, | ||
| 864 | .set_page_dirty = __set_page_dirty_nobuffers, | ||
| 865 | }; | ||
| 866 | |||
| 867 | int logfs_init_mapping(struct super_block *sb) | ||
| 868 | { | ||
| 869 | struct logfs_super *super = logfs_super(sb); | ||
| 870 | struct address_space *mapping; | ||
| 871 | struct inode *inode; | ||
| 872 | |||
| 873 | inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING); | ||
| 874 | if (IS_ERR(inode)) | ||
| 875 | return PTR_ERR(inode); | ||
| 876 | super->s_mapping_inode = inode; | ||
| 877 | mapping = inode->i_mapping; | ||
| 878 | mapping->a_ops = &mapping_aops; | ||
| 879 | /* Would it be possible to use __GFP_HIGHMEM as well? */ | ||
| 880 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
| 881 | return 0; | ||
| 882 | } | ||
| 883 | |||
| 884 | int logfs_init_areas(struct super_block *sb) | ||
| 885 | { | ||
| 886 | struct logfs_super *super = logfs_super(sb); | ||
| 887 | int i = -1; | ||
| 888 | |||
| 889 | super->s_alias_pool = mempool_create_kmalloc_pool(600, | ||
| 890 | sizeof(struct object_alias_item)); | ||
| 891 | if (!super->s_alias_pool) | ||
| 892 | return -ENOMEM; | ||
| 893 | |||
| 894 | super->s_journal_area = alloc_area(sb); | ||
| 895 | if (!super->s_journal_area) | ||
| 896 | goto err; | ||
| 897 | |||
| 898 | for_each_area(i) { | ||
| 899 | super->s_area[i] = alloc_area(sb); | ||
| 900 | if (!super->s_area[i]) | ||
| 901 | goto err; | ||
| 902 | super->s_area[i]->a_level = GC_LEVEL(i); | ||
| 903 | super->s_area[i]->a_ops = &ostore_area_ops; | ||
| 904 | } | ||
| 905 | btree_init_mempool128(&super->s_object_alias_tree, | ||
| 906 | super->s_btree_pool); | ||
| 907 | return 0; | ||
| 908 | |||
| 909 | err: | ||
| 910 | for (i--; i >= 0; i--) | ||
| 911 | free_area(super->s_area[i]); | ||
| 912 | free_area(super->s_journal_area); | ||
| 913 | mempool_destroy(super->s_alias_pool); | ||
| 914 | return -ENOMEM; | ||
| 915 | } | ||
| 916 | |||
| 917 | void logfs_cleanup_areas(struct super_block *sb) | ||
| 918 | { | ||
| 919 | struct logfs_super *super = logfs_super(sb); | ||
| 920 | int i; | ||
| 921 | |||
| 922 | btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); | ||
| 923 | for_each_area(i) | ||
| 924 | free_area(super->s_area[i]); | ||
| 925 | free_area(super->s_journal_area); | ||
| 926 | destroy_meta_inode(super->s_mapping_inode); | ||
| 927 | } | ||
