diff options
author | Chris Mason <chris.mason@oracle.com> | 2007-08-27 16:49:44 -0400 |
---|---|---|
committer | David Woodhouse <dwmw2@hera.kernel.org> | 2007-08-27 16:49:44 -0400 |
commit | a52d9a8033c454cd9b4697cfafb467509fc1693f (patch) | |
tree | 83d8bbf2df29598ea9c60498fadf5990262a11c8 /fs/btrfs/file.c | |
parent | 83df7c1d8b885e216c68c3f0469bec9b93666b74 (diff) |
Btrfs: Extent based page cache code. This uses an rbtree of extents and tests
instead of buffer heads.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 295 |
1 files changed, 138 insertions, 157 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6933ab11a5cd..71a481894ab6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -81,14 +81,14 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
81 | } | 81 | } |
82 | } | 82 | } |
83 | 83 | ||
84 | static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, | 84 | static int insert_inline_extent(struct btrfs_trans_handle *trans, |
85 | struct btrfs_root *root, struct inode *inode, | ||
85 | u64 offset, ssize_t size, | 86 | u64 offset, ssize_t size, |
86 | struct buffer_head *bh) | 87 | struct page *page, size_t page_offset) |
87 | { | 88 | { |
88 | struct btrfs_key key; | 89 | struct btrfs_key key; |
89 | struct btrfs_path *path; | 90 | struct btrfs_path *path; |
90 | char *ptr, *kaddr; | 91 | char *ptr, *kaddr; |
91 | struct btrfs_trans_handle *trans; | ||
92 | struct btrfs_file_extent_item *ei; | 92 | struct btrfs_file_extent_item *ei; |
93 | u32 datasize; | 93 | u32 datasize; |
94 | int err = 0; | 94 | int err = 0; |
@@ -98,8 +98,6 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, | |||
98 | if (!path) | 98 | if (!path) |
99 | return -ENOMEM; | 99 | return -ENOMEM; |
100 | 100 | ||
101 | mutex_lock(&root->fs_info->fs_mutex); | ||
102 | trans = btrfs_start_transaction(root, 1); | ||
103 | btrfs_set_trans_block_group(trans, inode); | 101 | btrfs_set_trans_block_group(trans, inode); |
104 | 102 | ||
105 | key.objectid = inode->i_ino; | 103 | key.objectid = inode->i_ino; |
@@ -122,18 +120,13 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, | |||
122 | BTRFS_FILE_EXTENT_INLINE); | 120 | BTRFS_FILE_EXTENT_INLINE); |
123 | ptr = btrfs_file_extent_inline_start(ei); | 121 | ptr = btrfs_file_extent_inline_start(ei); |
124 | 122 | ||
125 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | 123 | kaddr = kmap_atomic(page, KM_USER0); |
126 | btrfs_memcpy(root, path->nodes[0]->b_data, | 124 | btrfs_memcpy(root, path->nodes[0]->b_data, |
127 | ptr, kaddr + bh_offset(bh), | 125 | ptr, kaddr + page_offset, size); |
128 | size); | ||
129 | kunmap_atomic(kaddr, KM_USER0); | 126 | kunmap_atomic(kaddr, KM_USER0); |
130 | btrfs_mark_buffer_dirty(path->nodes[0]); | 127 | btrfs_mark_buffer_dirty(path->nodes[0]); |
131 | fail: | 128 | fail: |
132 | btrfs_free_path(path); | 129 | btrfs_free_path(path); |
133 | ret = btrfs_end_transaction(trans, root); | ||
134 | if (ret && !err) | ||
135 | err = ret; | ||
136 | mutex_unlock(&root->fs_info->fs_mutex); | ||
137 | return err; | 130 | return err; |
138 | } | 131 | } |
139 | 132 | ||
@@ -145,45 +138,143 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
145 | loff_t pos, | 138 | loff_t pos, |
146 | size_t write_bytes) | 139 | size_t write_bytes) |
147 | { | 140 | { |
148 | int i; | ||
149 | int offset; | ||
150 | int err = 0; | 141 | int err = 0; |
151 | int ret; | 142 | int i; |
152 | int this_write; | ||
153 | struct inode *inode = file->f_path.dentry->d_inode; | 143 | struct inode *inode = file->f_path.dentry->d_inode; |
154 | struct buffer_head *bh; | 144 | struct extent_map *em; |
145 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
146 | struct btrfs_key ins; | ||
147 | u64 hint_block; | ||
148 | u64 num_blocks; | ||
149 | u64 start_pos; | ||
150 | u64 end_of_last_block; | ||
151 | u64 end_pos = pos + write_bytes; | ||
152 | loff_t isize = i_size_read(inode); | ||
155 | 153 | ||
156 | for (i = 0; i < num_pages; i++) { | 154 | em = alloc_extent_map(GFP_NOFS); |
157 | offset = pos & (PAGE_CACHE_SIZE -1); | 155 | if (!em) |
158 | this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); | 156 | return -ENOMEM; |
159 | 157 | ||
160 | /* FIXME, one block at a time */ | 158 | em->bdev = inode->i_sb->s_bdev; |
161 | bh = page_buffers(pages[i]); | ||
162 | 159 | ||
163 | if (buffer_mapped(bh) && bh->b_blocknr == 0) { | 160 | start_pos = pos & ~((u64)root->blocksize - 1); |
164 | ret = insert_inline_extent(root, inode, | 161 | num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> |
165 | pages[i]->index << PAGE_CACHE_SHIFT, | 162 | inode->i_blkbits; |
166 | offset + this_write, bh); | ||
167 | if (ret) { | ||
168 | err = ret; | ||
169 | goto failed; | ||
170 | } | ||
171 | } | ||
172 | 163 | ||
173 | ret = btrfs_commit_write(file, pages[i], offset, | 164 | end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; |
174 | offset + this_write); | 165 | mutex_lock(&root->fs_info->fs_mutex); |
175 | pos += this_write; | 166 | trans = btrfs_start_transaction(root, 1); |
176 | if (ret) { | 167 | if (!trans) { |
177 | err = ret; | 168 | err = -ENOMEM; |
169 | goto out_unlock; | ||
170 | } | ||
171 | btrfs_set_trans_block_group(trans, inode); | ||
172 | inode->i_blocks += num_blocks << 3; | ||
173 | hint_block = 0; | ||
174 | |||
175 | if ((end_of_last_block & 4095) == 0) { | ||
176 | printk("strange end of last %Lu %lu %Lu\n", start_pos, write_bytes, end_of_last_block); | ||
177 | } | ||
178 | set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
179 | |||
180 | /* FIXME...EIEIO, ENOSPC and more */ | ||
181 | |||
182 | /* step one, delete the existing extents in this range */ | ||
183 | /* FIXME blocksize != pagesize */ | ||
184 | if (start_pos < inode->i_size) { | ||
185 | err = btrfs_drop_extents(trans, root, inode, | ||
186 | start_pos, (pos + write_bytes + root->blocksize -1) & | ||
187 | ~((u64)root->blocksize - 1), &hint_block); | ||
188 | if (err) | ||
189 | goto failed; | ||
190 | } | ||
191 | |||
192 | /* insert any holes we need to create */ | ||
193 | if (inode->i_size < start_pos) { | ||
194 | u64 last_pos_in_file; | ||
195 | u64 hole_size; | ||
196 | u64 mask = root->blocksize - 1; | ||
197 | last_pos_in_file = (isize + mask) & ~mask; | ||
198 | hole_size = (start_pos - last_pos_in_file + mask) & ~mask; | ||
199 | hole_size >>= inode->i_blkbits; | ||
200 | if (last_pos_in_file < start_pos) { | ||
201 | err = btrfs_insert_file_extent(trans, root, | ||
202 | inode->i_ino, | ||
203 | last_pos_in_file, | ||
204 | 0, 0, hole_size); | ||
205 | } | ||
206 | if (err) | ||
178 | goto failed; | 207 | goto failed; |
208 | } | ||
209 | |||
210 | /* | ||
211 | * either allocate an extent for the new bytes or setup the key | ||
212 | * to show we are doing inline data in the extent | ||
213 | */ | ||
214 | if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || | ||
215 | pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { | ||
216 | err = btrfs_alloc_extent(trans, root, inode->i_ino, | ||
217 | num_blocks, 0, hint_block, (u64)-1, | ||
218 | &ins, 1); | ||
219 | BUG_ON(err); | ||
220 | err = btrfs_insert_file_extent(trans, root, inode->i_ino, | ||
221 | start_pos, ins.objectid, ins.offset, | ||
222 | ins.offset); | ||
223 | BUG_ON(err); | ||
224 | em->start = start_pos; | ||
225 | em->end = end_of_last_block; | ||
226 | em->block_start = ins.objectid << inode->i_blkbits; | ||
227 | em->block_end = em->block_start + | ||
228 | (ins.offset << inode->i_blkbits) - 1; | ||
229 | set_extent_dirty(em_tree, start_pos, end_of_last_block, | ||
230 | GFP_NOFS); | ||
231 | err = add_extent_mapping(em_tree, em); | ||
232 | for (i = 0; i < num_pages; i++) { | ||
233 | struct page *p = pages[i]; | ||
234 | SetPageUptodate(p); | ||
235 | __set_page_dirty_nobuffers(p); | ||
179 | } | 236 | } |
180 | WARN_ON(this_write > write_bytes); | 237 | } else { |
181 | write_bytes -= this_write; | 238 | struct page *p = pages[0]; |
239 | err = insert_inline_extent(trans, root, inode, start_pos, | ||
240 | end_pos - start_pos, p, 0); | ||
241 | BUG_ON(err); | ||
242 | em->start = start_pos; | ||
243 | em->end = end_pos; | ||
244 | em->block_start = EXTENT_MAP_INLINE; | ||
245 | em->block_end = EXTENT_MAP_INLINE; | ||
246 | add_extent_mapping(em_tree, em); | ||
247 | } | ||
248 | if (end_pos > isize) { | ||
249 | i_size_write(inode, end_pos); | ||
250 | btrfs_update_inode(trans, root, inode); | ||
182 | } | 251 | } |
183 | failed: | 252 | failed: |
253 | err = btrfs_end_transaction(trans, root); | ||
254 | out_unlock: | ||
255 | mutex_unlock(&root->fs_info->fs_mutex); | ||
256 | free_extent_map(em); | ||
184 | return err; | 257 | return err; |
185 | } | 258 | } |
186 | 259 | ||
260 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) | ||
261 | { | ||
262 | struct extent_map *em; | ||
263 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
264 | |||
265 | while(1) { | ||
266 | em = lookup_extent_mapping(em_tree, start, end); | ||
267 | if (!em) | ||
268 | break; | ||
269 | remove_extent_mapping(em_tree, em); | ||
270 | /* once for us */ | ||
271 | free_extent_map(em); | ||
272 | /* once for the tree*/ | ||
273 | free_extent_map(em); | ||
274 | } | ||
275 | return 0; | ||
276 | } | ||
277 | |||
187 | /* | 278 | /* |
188 | * this is very complex, but the basic idea is to drop all extents | 279 | * this is very complex, but the basic idea is to drop all extents |
189 | * in the range start - end. hint_block is filled in with a block number | 280 | * in the range start - end. hint_block is filled in with a block number |
@@ -213,6 +304,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
213 | int found_inline; | 304 | int found_inline; |
214 | int recow; | 305 | int recow; |
215 | 306 | ||
307 | btrfs_drop_extent_cache(inode, start, end - 1); | ||
308 | |||
216 | path = btrfs_alloc_path(); | 309 | path = btrfs_alloc_path(); |
217 | if (!path) | 310 | if (!path) |
218 | return -ENOMEM; | 311 | return -ENOMEM; |
@@ -434,18 +527,9 @@ static int prepare_pages(struct btrfs_root *root, | |||
434 | int i; | 527 | int i; |
435 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 528 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
436 | struct inode *inode = file->f_path.dentry->d_inode; | 529 | struct inode *inode = file->f_path.dentry->d_inode; |
437 | int offset; | ||
438 | int err = 0; | 530 | int err = 0; |
439 | int this_write; | ||
440 | struct buffer_head *bh; | ||
441 | struct buffer_head *head; | ||
442 | loff_t isize = i_size_read(inode); | ||
443 | struct btrfs_trans_handle *trans; | ||
444 | u64 hint_block; | ||
445 | u64 num_blocks; | 531 | u64 num_blocks; |
446 | u64 alloc_extent_start; | ||
447 | u64 start_pos; | 532 | u64 start_pos; |
448 | struct btrfs_key ins; | ||
449 | 533 | ||
450 | start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); | 534 | start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); |
451 | num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> | 535 | num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> |
@@ -457,119 +541,17 @@ static int prepare_pages(struct btrfs_root *root, | |||
457 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 541 | pages[i] = grab_cache_page(inode->i_mapping, index + i); |
458 | if (!pages[i]) { | 542 | if (!pages[i]) { |
459 | err = -ENOMEM; | 543 | err = -ENOMEM; |
460 | goto failed_release; | 544 | BUG_ON(1); |
461 | } | 545 | } |
462 | cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); | 546 | cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); |
463 | wait_on_page_writeback(pages[i]); | 547 | wait_on_page_writeback(pages[i]); |
464 | } | 548 | if (!PagePrivate(pages[i])) { |
465 | 549 | SetPagePrivate(pages[i]); | |
466 | mutex_lock(&root->fs_info->fs_mutex); | 550 | set_page_private(pages[i], 1); |
467 | trans = btrfs_start_transaction(root, 1); | 551 | page_cache_get(pages[i]); |
468 | if (!trans) { | ||
469 | err = -ENOMEM; | ||
470 | mutex_unlock(&root->fs_info->fs_mutex); | ||
471 | goto out_unlock; | ||
472 | } | ||
473 | btrfs_set_trans_block_group(trans, inode); | ||
474 | /* FIXME blocksize != 4096 */ | ||
475 | inode->i_blocks += num_blocks << 3; | ||
476 | hint_block = 0; | ||
477 | |||
478 | /* FIXME...EIEIO, ENOSPC and more */ | ||
479 | |||
480 | /* step one, delete the existing extents in this range */ | ||
481 | /* FIXME blocksize != pagesize */ | ||
482 | if (start_pos < inode->i_size) { | ||
483 | err = btrfs_drop_extents(trans, root, inode, | ||
484 | start_pos, (pos + write_bytes + root->blocksize -1) & | ||
485 | ~((u64)root->blocksize - 1), &hint_block); | ||
486 | if (err) | ||
487 | goto failed_release; | ||
488 | } | ||
489 | |||
490 | /* insert any holes we need to create */ | ||
491 | if (inode->i_size < start_pos) { | ||
492 | u64 last_pos_in_file; | ||
493 | u64 hole_size; | ||
494 | u64 mask = root->blocksize - 1; | ||
495 | last_pos_in_file = (isize + mask) & ~mask; | ||
496 | hole_size = (start_pos - last_pos_in_file + mask) & ~mask; | ||
497 | hole_size >>= inode->i_blkbits; | ||
498 | if (last_pos_in_file < start_pos) { | ||
499 | err = btrfs_insert_file_extent(trans, root, | ||
500 | inode->i_ino, | ||
501 | last_pos_in_file, | ||
502 | 0, 0, hole_size); | ||
503 | } | ||
504 | if (err) | ||
505 | goto failed_release; | ||
506 | } | ||
507 | |||
508 | /* | ||
509 | * either allocate an extent for the new bytes or setup the key | ||
510 | * to show we are doing inline data in the extent | ||
511 | */ | ||
512 | if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || | ||
513 | pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { | ||
514 | err = btrfs_alloc_extent(trans, root, inode->i_ino, | ||
515 | num_blocks, 0, hint_block, (u64)-1, | ||
516 | &ins, 1); | ||
517 | if (err) | ||
518 | goto failed_truncate; | ||
519 | err = btrfs_insert_file_extent(trans, root, inode->i_ino, | ||
520 | start_pos, ins.objectid, ins.offset, | ||
521 | ins.offset); | ||
522 | if (err) | ||
523 | goto failed_truncate; | ||
524 | } else { | ||
525 | ins.offset = 0; | ||
526 | ins.objectid = 0; | ||
527 | } | ||
528 | BUG_ON(err); | ||
529 | alloc_extent_start = ins.objectid; | ||
530 | err = btrfs_end_transaction(trans, root); | ||
531 | mutex_unlock(&root->fs_info->fs_mutex); | ||
532 | |||
533 | for (i = 0; i < num_pages; i++) { | ||
534 | offset = pos & (PAGE_CACHE_SIZE -1); | ||
535 | this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); | ||
536 | if (!page_has_buffers(pages[i])) { | ||
537 | create_empty_buffers(pages[i], | ||
538 | root->fs_info->sb->s_blocksize, | ||
539 | (1 << BH_Uptodate)); | ||
540 | } | 552 | } |
541 | head = page_buffers(pages[i]); | ||
542 | bh = head; | ||
543 | do { | ||
544 | err = btrfs_map_bh_to_logical(root, bh, | ||
545 | alloc_extent_start); | ||
546 | BUG_ON(err); | ||
547 | if (err) | ||
548 | goto failed_truncate; | ||
549 | bh = bh->b_this_page; | ||
550 | if (alloc_extent_start) | ||
551 | alloc_extent_start++; | ||
552 | } while (bh != head); | ||
553 | pos += this_write; | ||
554 | WARN_ON(this_write > write_bytes); | ||
555 | write_bytes -= this_write; | ||
556 | } | 553 | } |
557 | return 0; | 554 | return 0; |
558 | |||
559 | failed_release: | ||
560 | btrfs_drop_pages(pages, num_pages); | ||
561 | return err; | ||
562 | |||
563 | failed_truncate: | ||
564 | btrfs_drop_pages(pages, num_pages); | ||
565 | if (pos > isize) | ||
566 | vmtruncate(inode, isize); | ||
567 | return err; | ||
568 | |||
569 | out_unlock: | ||
570 | mutex_unlock(&root->fs_info->fs_mutex); | ||
571 | goto failed_release; | ||
572 | |||
573 | } | 555 | } |
574 | 556 | ||
575 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 557 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, |
@@ -685,7 +667,6 @@ out: | |||
685 | page_cache_release(pinned[1]); | 667 | page_cache_release(pinned[1]); |
686 | *ppos = pos; | 668 | *ppos = pos; |
687 | current->backing_dev_info = NULL; | 669 | current->backing_dev_info = NULL; |
688 | mark_inode_dirty(inode); | ||
689 | return num_written ? num_written : err; | 670 | return num_written ? num_written : err; |
690 | } | 671 | } |
691 | 672 | ||
@@ -714,8 +695,8 @@ static int btrfs_sync_file(struct file *file, | |||
714 | mutex_unlock(&root->fs_info->trans_mutex); | 695 | mutex_unlock(&root->fs_info->trans_mutex); |
715 | 696 | ||
716 | /* | 697 | /* |
717 | * ok we haven't committed the transaction yet, lets do a commit | 698 | * ok we haven't committed the transaction yet, lets do a commit |
718 | */ | 699 | */ |
719 | trans = btrfs_start_transaction(root, 1); | 700 | trans = btrfs_start_transaction(root, 1); |
720 | if (!trans) { | 701 | if (!trans) { |
721 | ret = -ENOMEM; | 702 | ret = -ENOMEM; |