aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-08-27 16:49:44 -0400
committerDavid Woodhouse <dwmw2@hera.kernel.org>2007-08-27 16:49:44 -0400
commita52d9a8033c454cd9b4697cfafb467509fc1693f (patch)
tree83d8bbf2df29598ea9c60498fadf5990262a11c8 /fs/btrfs/file.c
parent83df7c1d8b885e216c68c3f0469bec9b93666b74 (diff)
Btrfs: Extent based page cache code. This uses an rbtree of extents and tests
instead of buffer heads. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c295
1 files changed, 138 insertions, 157 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 6933ab11a5cd..71a481894ab6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -81,14 +81,14 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
81 } 81 }
82} 82}
83 83
84static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, 84static int insert_inline_extent(struct btrfs_trans_handle *trans,
85 struct btrfs_root *root, struct inode *inode,
85 u64 offset, ssize_t size, 86 u64 offset, ssize_t size,
86 struct buffer_head *bh) 87 struct page *page, size_t page_offset)
87{ 88{
88 struct btrfs_key key; 89 struct btrfs_key key;
89 struct btrfs_path *path; 90 struct btrfs_path *path;
90 char *ptr, *kaddr; 91 char *ptr, *kaddr;
91 struct btrfs_trans_handle *trans;
92 struct btrfs_file_extent_item *ei; 92 struct btrfs_file_extent_item *ei;
93 u32 datasize; 93 u32 datasize;
94 int err = 0; 94 int err = 0;
@@ -98,8 +98,6 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
98 if (!path) 98 if (!path)
99 return -ENOMEM; 99 return -ENOMEM;
100 100
101 mutex_lock(&root->fs_info->fs_mutex);
102 trans = btrfs_start_transaction(root, 1);
103 btrfs_set_trans_block_group(trans, inode); 101 btrfs_set_trans_block_group(trans, inode);
104 102
105 key.objectid = inode->i_ino; 103 key.objectid = inode->i_ino;
@@ -122,18 +120,13 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
122 BTRFS_FILE_EXTENT_INLINE); 120 BTRFS_FILE_EXTENT_INLINE);
123 ptr = btrfs_file_extent_inline_start(ei); 121 ptr = btrfs_file_extent_inline_start(ei);
124 122
125 kaddr = kmap_atomic(bh->b_page, KM_USER0); 123 kaddr = kmap_atomic(page, KM_USER0);
126 btrfs_memcpy(root, path->nodes[0]->b_data, 124 btrfs_memcpy(root, path->nodes[0]->b_data,
127 ptr, kaddr + bh_offset(bh), 125 ptr, kaddr + page_offset, size);
128 size);
129 kunmap_atomic(kaddr, KM_USER0); 126 kunmap_atomic(kaddr, KM_USER0);
130 btrfs_mark_buffer_dirty(path->nodes[0]); 127 btrfs_mark_buffer_dirty(path->nodes[0]);
131fail: 128fail:
132 btrfs_free_path(path); 129 btrfs_free_path(path);
133 ret = btrfs_end_transaction(trans, root);
134 if (ret && !err)
135 err = ret;
136 mutex_unlock(&root->fs_info->fs_mutex);
137 return err; 130 return err;
138} 131}
139 132
@@ -145,45 +138,143 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
145 loff_t pos, 138 loff_t pos,
146 size_t write_bytes) 139 size_t write_bytes)
147{ 140{
148 int i;
149 int offset;
150 int err = 0; 141 int err = 0;
151 int ret; 142 int i;
152 int this_write;
153 struct inode *inode = file->f_path.dentry->d_inode; 143 struct inode *inode = file->f_path.dentry->d_inode;
154 struct buffer_head *bh; 144 struct extent_map *em;
145 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
146 struct btrfs_key ins;
147 u64 hint_block;
148 u64 num_blocks;
149 u64 start_pos;
150 u64 end_of_last_block;
151 u64 end_pos = pos + write_bytes;
152 loff_t isize = i_size_read(inode);
155 153
156 for (i = 0; i < num_pages; i++) { 154 em = alloc_extent_map(GFP_NOFS);
157 offset = pos & (PAGE_CACHE_SIZE -1); 155 if (!em)
158 this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); 156 return -ENOMEM;
159 157
160 /* FIXME, one block at a time */ 158 em->bdev = inode->i_sb->s_bdev;
161 bh = page_buffers(pages[i]);
162 159
163 if (buffer_mapped(bh) && bh->b_blocknr == 0) { 160 start_pos = pos & ~((u64)root->blocksize - 1);
164 ret = insert_inline_extent(root, inode, 161 num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
165 pages[i]->index << PAGE_CACHE_SHIFT, 162 inode->i_blkbits;
166 offset + this_write, bh);
167 if (ret) {
168 err = ret;
169 goto failed;
170 }
171 }
172 163
173 ret = btrfs_commit_write(file, pages[i], offset, 164 end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1;
174 offset + this_write); 165 mutex_lock(&root->fs_info->fs_mutex);
175 pos += this_write; 166 trans = btrfs_start_transaction(root, 1);
176 if (ret) { 167 if (!trans) {
177 err = ret; 168 err = -ENOMEM;
169 goto out_unlock;
170 }
171 btrfs_set_trans_block_group(trans, inode);
172 inode->i_blocks += num_blocks << 3;
173 hint_block = 0;
174
175 if ((end_of_last_block & 4095) == 0) {
176 printk("strange end of last %Lu %lu %Lu\n", start_pos, write_bytes, end_of_last_block);
177 }
178 set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS);
179
180 /* FIXME...EIEIO, ENOSPC and more */
181
182 /* step one, delete the existing extents in this range */
183 /* FIXME blocksize != pagesize */
184 if (start_pos < inode->i_size) {
185 err = btrfs_drop_extents(trans, root, inode,
186 start_pos, (pos + write_bytes + root->blocksize -1) &
187 ~((u64)root->blocksize - 1), &hint_block);
188 if (err)
189 goto failed;
190 }
191
192 /* insert any holes we need to create */
193 if (inode->i_size < start_pos) {
194 u64 last_pos_in_file;
195 u64 hole_size;
196 u64 mask = root->blocksize - 1;
197 last_pos_in_file = (isize + mask) & ~mask;
198 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
199 hole_size >>= inode->i_blkbits;
200 if (last_pos_in_file < start_pos) {
201 err = btrfs_insert_file_extent(trans, root,
202 inode->i_ino,
203 last_pos_in_file,
204 0, 0, hole_size);
205 }
206 if (err)
178 goto failed; 207 goto failed;
208 }
209
210 /*
211 * either allocate an extent for the new bytes or setup the key
212 * to show we are doing inline data in the extent
213 */
214 if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
215 pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
216 err = btrfs_alloc_extent(trans, root, inode->i_ino,
217 num_blocks, 0, hint_block, (u64)-1,
218 &ins, 1);
219 BUG_ON(err);
220 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
221 start_pos, ins.objectid, ins.offset,
222 ins.offset);
223 BUG_ON(err);
224 em->start = start_pos;
225 em->end = end_of_last_block;
226 em->block_start = ins.objectid << inode->i_blkbits;
227 em->block_end = em->block_start +
228 (ins.offset << inode->i_blkbits) - 1;
229 set_extent_dirty(em_tree, start_pos, end_of_last_block,
230 GFP_NOFS);
231 err = add_extent_mapping(em_tree, em);
232 for (i = 0; i < num_pages; i++) {
233 struct page *p = pages[i];
234 SetPageUptodate(p);
235 __set_page_dirty_nobuffers(p);
179 } 236 }
180 WARN_ON(this_write > write_bytes); 237 } else {
181 write_bytes -= this_write; 238 struct page *p = pages[0];
239 err = insert_inline_extent(trans, root, inode, start_pos,
240 end_pos - start_pos, p, 0);
241 BUG_ON(err);
242 em->start = start_pos;
243 em->end = end_pos;
244 em->block_start = EXTENT_MAP_INLINE;
245 em->block_end = EXTENT_MAP_INLINE;
246 add_extent_mapping(em_tree, em);
247 }
248 if (end_pos > isize) {
249 i_size_write(inode, end_pos);
250 btrfs_update_inode(trans, root, inode);
182 } 251 }
183failed: 252failed:
253 err = btrfs_end_transaction(trans, root);
254out_unlock:
255 mutex_unlock(&root->fs_info->fs_mutex);
256 free_extent_map(em);
184 return err; 257 return err;
185} 258}
186 259
260int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
261{
262 struct extent_map *em;
263 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
264
265 while(1) {
266 em = lookup_extent_mapping(em_tree, start, end);
267 if (!em)
268 break;
269 remove_extent_mapping(em_tree, em);
270 /* once for us */
271 free_extent_map(em);
272 /* once for the tree*/
273 free_extent_map(em);
274 }
275 return 0;
276}
277
187/* 278/*
188 * this is very complex, but the basic idea is to drop all extents 279 * this is very complex, but the basic idea is to drop all extents
189 * in the range start - end. hint_block is filled in with a block number 280 * in the range start - end. hint_block is filled in with a block number
@@ -213,6 +304,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
213 int found_inline; 304 int found_inline;
214 int recow; 305 int recow;
215 306
307 btrfs_drop_extent_cache(inode, start, end - 1);
308
216 path = btrfs_alloc_path(); 309 path = btrfs_alloc_path();
217 if (!path) 310 if (!path)
218 return -ENOMEM; 311 return -ENOMEM;
@@ -434,18 +527,9 @@ static int prepare_pages(struct btrfs_root *root,
434 int i; 527 int i;
435 unsigned long index = pos >> PAGE_CACHE_SHIFT; 528 unsigned long index = pos >> PAGE_CACHE_SHIFT;
436 struct inode *inode = file->f_path.dentry->d_inode; 529 struct inode *inode = file->f_path.dentry->d_inode;
437 int offset;
438 int err = 0; 530 int err = 0;
439 int this_write;
440 struct buffer_head *bh;
441 struct buffer_head *head;
442 loff_t isize = i_size_read(inode);
443 struct btrfs_trans_handle *trans;
444 u64 hint_block;
445 u64 num_blocks; 531 u64 num_blocks;
446 u64 alloc_extent_start;
447 u64 start_pos; 532 u64 start_pos;
448 struct btrfs_key ins;
449 533
450 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); 534 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
451 num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> 535 num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
@@ -457,119 +541,17 @@ static int prepare_pages(struct btrfs_root *root,
457 pages[i] = grab_cache_page(inode->i_mapping, index + i); 541 pages[i] = grab_cache_page(inode->i_mapping, index + i);
458 if (!pages[i]) { 542 if (!pages[i]) {
459 err = -ENOMEM; 543 err = -ENOMEM;
460 goto failed_release; 544 BUG_ON(1);
461 } 545 }
462 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); 546 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
463 wait_on_page_writeback(pages[i]); 547 wait_on_page_writeback(pages[i]);
464 } 548 if (!PagePrivate(pages[i])) {
465 549 SetPagePrivate(pages[i]);
466 mutex_lock(&root->fs_info->fs_mutex); 550 set_page_private(pages[i], 1);
467 trans = btrfs_start_transaction(root, 1); 551 page_cache_get(pages[i]);
468 if (!trans) {
469 err = -ENOMEM;
470 mutex_unlock(&root->fs_info->fs_mutex);
471 goto out_unlock;
472 }
473 btrfs_set_trans_block_group(trans, inode);
474 /* FIXME blocksize != 4096 */
475 inode->i_blocks += num_blocks << 3;
476 hint_block = 0;
477
478 /* FIXME...EIEIO, ENOSPC and more */
479
480 /* step one, delete the existing extents in this range */
481 /* FIXME blocksize != pagesize */
482 if (start_pos < inode->i_size) {
483 err = btrfs_drop_extents(trans, root, inode,
484 start_pos, (pos + write_bytes + root->blocksize -1) &
485 ~((u64)root->blocksize - 1), &hint_block);
486 if (err)
487 goto failed_release;
488 }
489
490 /* insert any holes we need to create */
491 if (inode->i_size < start_pos) {
492 u64 last_pos_in_file;
493 u64 hole_size;
494 u64 mask = root->blocksize - 1;
495 last_pos_in_file = (isize + mask) & ~mask;
496 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
497 hole_size >>= inode->i_blkbits;
498 if (last_pos_in_file < start_pos) {
499 err = btrfs_insert_file_extent(trans, root,
500 inode->i_ino,
501 last_pos_in_file,
502 0, 0, hole_size);
503 }
504 if (err)
505 goto failed_release;
506 }
507
508 /*
509 * either allocate an extent for the new bytes or setup the key
510 * to show we are doing inline data in the extent
511 */
512 if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
513 pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
514 err = btrfs_alloc_extent(trans, root, inode->i_ino,
515 num_blocks, 0, hint_block, (u64)-1,
516 &ins, 1);
517 if (err)
518 goto failed_truncate;
519 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
520 start_pos, ins.objectid, ins.offset,
521 ins.offset);
522 if (err)
523 goto failed_truncate;
524 } else {
525 ins.offset = 0;
526 ins.objectid = 0;
527 }
528 BUG_ON(err);
529 alloc_extent_start = ins.objectid;
530 err = btrfs_end_transaction(trans, root);
531 mutex_unlock(&root->fs_info->fs_mutex);
532
533 for (i = 0; i < num_pages; i++) {
534 offset = pos & (PAGE_CACHE_SIZE -1);
535 this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
536 if (!page_has_buffers(pages[i])) {
537 create_empty_buffers(pages[i],
538 root->fs_info->sb->s_blocksize,
539 (1 << BH_Uptodate));
540 } 552 }
541 head = page_buffers(pages[i]);
542 bh = head;
543 do {
544 err = btrfs_map_bh_to_logical(root, bh,
545 alloc_extent_start);
546 BUG_ON(err);
547 if (err)
548 goto failed_truncate;
549 bh = bh->b_this_page;
550 if (alloc_extent_start)
551 alloc_extent_start++;
552 } while (bh != head);
553 pos += this_write;
554 WARN_ON(this_write > write_bytes);
555 write_bytes -= this_write;
556 } 553 }
557 return 0; 554 return 0;
558
559failed_release:
560 btrfs_drop_pages(pages, num_pages);
561 return err;
562
563failed_truncate:
564 btrfs_drop_pages(pages, num_pages);
565 if (pos > isize)
566 vmtruncate(inode, isize);
567 return err;
568
569out_unlock:
570 mutex_unlock(&root->fs_info->fs_mutex);
571 goto failed_release;
572
573} 555}
574 556
575static ssize_t btrfs_file_write(struct file *file, const char __user *buf, 557static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
@@ -685,7 +667,6 @@ out:
685 page_cache_release(pinned[1]); 667 page_cache_release(pinned[1]);
686 *ppos = pos; 668 *ppos = pos;
687 current->backing_dev_info = NULL; 669 current->backing_dev_info = NULL;
688 mark_inode_dirty(inode);
689 return num_written ? num_written : err; 670 return num_written ? num_written : err;
690} 671}
691 672
@@ -714,8 +695,8 @@ static int btrfs_sync_file(struct file *file,
714 mutex_unlock(&root->fs_info->trans_mutex); 695 mutex_unlock(&root->fs_info->trans_mutex);
715 696
716 /* 697 /*
717 * ok we haven't committed the transaction yet, lets do a commit 698 * ok we haven't committed the transaction yet, lets do a commit
718 */ 699 */
719 trans = btrfs_start_transaction(root, 1); 700 trans = btrfs_start_transaction(root, 1);
720 if (!trans) { 701 if (!trans) {
721 ret = -ENOMEM; 702 ret = -ENOMEM;