From 39279cc3d2704cfbf9c35dcb5bdd392159ae4625 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 12 Jun 2007 06:35:45 -0400
Subject: Btrfs: split up super.c

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 783 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 783 insertions(+)
 create mode 100644 fs/btrfs/file.c

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
new file mode 100644
index 000000000000..7f8e3035d99e
--- /dev/null
+++ b/fs/btrfs/file.c
@@ -0,0 +1,783 @@
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/backing-dev.h>
+#include <linux/mpage.h>
+#include <linux/swap.h>
+#include <linux/writeback.h>
+#include <linux/statfs.h>
+#include <linux/compat.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "btrfs_inode.h"
+#include "ioctl.h"
+#include "print-tree.h"
+
+
+static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
+				struct page **prepared_pages,
+				const char __user * buf)
+{
+	long page_fault = 0;
+	int i;
+	int offset = pos & (PAGE_CACHE_SIZE - 1);
+
+	for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
+		size_t count = min_t(size_t,
+				     PAGE_CACHE_SIZE - offset, write_bytes);
+		struct page *page = prepared_pages[i];
+		fault_in_pages_readable(buf, count);
+
+		/* Copy data from userspace to the current page */
+		kmap(page);
+		page_fault = __copy_from_user(page_address(page) + offset,
+					      buf, count);
+		/* Flush processor's dcache for this page */
+		flush_dcache_page(page);
+		kunmap(page);
+		buf += count;
+		write_bytes -= count;
+
+		if (page_fault)
+			break;
+	}
+	return page_fault ? -EFAULT : 0;
+}
+
+static void btrfs_drop_pages(struct page **pages, size_t num_pages)
+{
+	size_t i;
+	for (i = 0; i < num_pages; i++) {
+		if (!pages[i])
+			break;
+		unlock_page(pages[i]);
+		mark_page_accessed(pages[i]);
+		page_cache_release(pages[i]);
+	}
+}
+
+static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   struct file *file,
+				   struct page **pages,
+				   size_t num_pages,
+				   loff_t pos,
+				   size_t write_bytes)
+{
+	int i;
+	int offset;
+	int err = 0;
+	int ret;
+	int this_write;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct buffer_head *bh;
+	struct btrfs_file_extent_item *ei;
+
+	for (i = 0; i < num_pages; i++) {
+		offset = pos & (PAGE_CACHE_SIZE -1);
+		this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
+		/* FIXME, one block at a time */
+
+		mutex_lock(&root->fs_info->fs_mutex);
+		trans = btrfs_start_transaction(root, 1);
+		btrfs_set_trans_block_group(trans, inode);
+
+		bh = page_buffers(pages[i]);
+
+		if (buffer_mapped(bh) && bh->b_blocknr == 0) {
+			struct btrfs_key key;
+			struct btrfs_path *path;
+			char *ptr;
+			u32 datasize;
+
+			/* create an inline extent, and copy the data in */
+			path = btrfs_alloc_path();
+			BUG_ON(!path);
+			key.objectid = inode->i_ino;
+			key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
+			key.flags = 0;
+			btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
+			BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
+			datasize = offset +
+				btrfs_file_extent_calc_inline_size(write_bytes);
+
+			ret = btrfs_insert_empty_item(trans, root, path, &key,
+						      datasize);
+			BUG_ON(ret);
+			ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
+			       path->slots[0], struct btrfs_file_extent_item);
+			btrfs_set_file_extent_generation(ei, trans->transid);
+			btrfs_set_file_extent_type(ei,
+						   BTRFS_FILE_EXTENT_INLINE);
+			ptr = btrfs_file_extent_inline_start(ei);
+			btrfs_memcpy(root, path->nodes[0]->b_data,
+				     ptr, bh->b_data, offset + write_bytes);
+			mark_buffer_dirty(path->nodes[0]);
+			btrfs_free_path(path);
+		} else if (buffer_mapped(bh)) {
+			/* csum the file data */
+			btrfs_csum_file_block(trans, root, inode->i_ino,
+				      pages[i]->index << PAGE_CACHE_SHIFT,
+				      kmap(pages[i]), PAGE_CACHE_SIZE);
+			kunmap(pages[i]);
+		}
+		SetPageChecked(pages[i]);
+		ret = btrfs_end_transaction(trans, root);
+		BUG_ON(ret);
+		mutex_unlock(&root->fs_info->fs_mutex);
+
+		ret = btrfs_commit_write(file, pages[i], offset,
+					 offset + this_write);
+		pos += this_write;
+		if (ret) {
+			err = ret;
+			goto failed;
+		}
+		WARN_ON(this_write > write_bytes);
+		write_bytes -= this_write;
+	}
+failed:
+	return err;
+}
+
+/*
+ * this is very complex, but the basic idea is to drop all extents
+ * in the range start - end.  hint_block is filled in with a block number
+ * that would be a good hint to the block allocator for this file.
+ *
+ * If an extent intersects the range but is not entirely inside the range
+ * it is either truncated or split.  Anything entirely inside the range
+ * is deleted from the tree.
+ */
+int btrfs_drop_extents(struct btrfs_trans_handle *trans,
+		       struct btrfs_root *root, struct inode *inode,
+		       u64 start, u64 end, u64 *hint_block)
+{
+	int ret;
+	struct btrfs_key key;
+	struct btrfs_leaf *leaf;
+	int slot;
+	struct btrfs_file_extent_item *extent;
+	u64 extent_end = 0;
+	int keep;
+	struct btrfs_file_extent_item old;
+	struct btrfs_path *path;
+	u64 search_start = start;
+	int bookend;
+	int found_type;
+	int found_extent;
+	int found_inline;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	while(1) {
+		btrfs_release_path(root, path);
+		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+					       search_start, -1);
+		if (ret < 0)
+			goto out;
+		if (ret > 0) {
+			if (path->slots[0] == 0) {
+				ret = 0;
+				goto out;
+			}
+			path->slots[0]--;
+		}
+		keep = 0;
+		bookend = 0;
+		found_extent = 0;
+		found_inline = 0;
+		extent = NULL;
+		leaf = btrfs_buffer_leaf(path->nodes[0]);
+		slot = path->slots[0];
+		btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
+		if (key.offset >= end || key.objectid != inode->i_ino) {
+			ret = 0;
+			goto out;
+		}
+		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
+			ret = 0;
+			goto out;
+		}
+		extent = btrfs_item_ptr(leaf, slot,
+					struct btrfs_file_extent_item);
+		found_type = btrfs_file_extent_type(extent);
+		if (found_type == BTRFS_FILE_EXTENT_REG) {
+			extent_end = key.offset +
+				(btrfs_file_extent_num_blocks(extent) <<
+				 inode->i_blkbits);
+			found_extent = 1;
+		} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+			found_inline = 1;
+			extent_end = key.offset +
+			     btrfs_file_extent_inline_len(leaf->items + slot);
+		}
+
+		/* we found nothing we can drop */
+		if (!found_extent && !found_inline) {
+			ret = 0;
+			goto out;
+		}
+
+		/* we found nothing inside the range */
+		if (search_start >= extent_end) {
+			ret = 0;
+			goto out;
+		}
+
+		/* FIXME, there's only one inline extent allowed right now */
+		if (found_inline) {
+			u64 mask = root->blocksize - 1;
+			search_start = (extent_end + mask) & ~mask;
+		} else
+			search_start = extent_end;
+
+		if (end < extent_end && end >= key.offset) {
+			if (found_extent) {
+				u64 disk_blocknr =
+					btrfs_file_extent_disk_blocknr(extent);
+				u64 disk_num_blocks =
+				      btrfs_file_extent_disk_num_blocks(extent);
+				memcpy(&old, extent, sizeof(old));
+				if (disk_blocknr != 0) {
+					ret = btrfs_inc_extent_ref(trans, root,
+					         disk_blocknr, disk_num_blocks);
+					BUG_ON(ret);
+				}
+			}
+			WARN_ON(found_inline);
+			bookend = 1;
+		}
+
+		/* truncate existing extent */
+		if (start > key.offset) {
+			u64 new_num;
+			u64 old_num;
+			keep = 1;
+			WARN_ON(start & (root->blocksize - 1));
+			if (found_extent) {
+				new_num = (start - key.offset) >>
+					inode->i_blkbits;
+				old_num = btrfs_file_extent_num_blocks(extent);
+				*hint_block =
+					btrfs_file_extent_disk_blocknr(extent);
+				if (btrfs_file_extent_disk_blocknr(extent)) {
+					inode->i_blocks -=
+						(old_num - new_num) << 3;
+				}
+				btrfs_set_file_extent_num_blocks(extent,
+								 new_num);
+				mark_buffer_dirty(path->nodes[0]);
+			} else {
+				WARN_ON(1);
+			}
+		}
+		/* delete the entire extent */
+		if (!keep) {
+			u64 disk_blocknr = 0;
+			u64 disk_num_blocks = 0;
+			u64 extent_num_blocks = 0;
+			if (found_extent) {
+				disk_blocknr =
+				      btrfs_file_extent_disk_blocknr(extent);
+				disk_num_blocks =
+				      btrfs_file_extent_disk_num_blocks(extent);
+				extent_num_blocks =
+				      btrfs_file_extent_num_blocks(extent);
+				*hint_block =
+					btrfs_file_extent_disk_blocknr(extent);
+			}
+			ret = btrfs_del_item(trans, root, path);
+			BUG_ON(ret);
+			btrfs_release_path(root, path);
+			extent = NULL;
+			if (found_extent && disk_blocknr != 0) {
+				inode->i_blocks -= extent_num_blocks << 3;
+				ret = btrfs_free_extent(trans, root,
+							disk_blocknr,
+							disk_num_blocks, 0);
+			}
+
+			BUG_ON(ret);
+			if (!bookend && search_start >= end) {
+				ret = 0;
+				goto out;
+			}
+			if (!bookend)
+				continue;
+		}
+		/* create bookend, splitting the extent in two */
+		if (bookend && found_extent) {
+			struct btrfs_key ins;
+			ins.objectid = inode->i_ino;
+			ins.offset = end;
+			ins.flags = 0;
+			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
+
+			btrfs_release_path(root, path);
+			ret = btrfs_insert_empty_item(trans, root, path, &ins,
+						      sizeof(*extent));
+			BUG_ON(ret);
+			extent = btrfs_item_ptr(
+				    btrfs_buffer_leaf(path->nodes[0]),
+				    path->slots[0],
+				    struct btrfs_file_extent_item);
+			btrfs_set_file_extent_disk_blocknr(extent,
+				    btrfs_file_extent_disk_blocknr(&old));
+			btrfs_set_file_extent_disk_num_blocks(extent,
+				    btrfs_file_extent_disk_num_blocks(&old));
+
+			btrfs_set_file_extent_offset(extent,
+				    btrfs_file_extent_offset(&old) +
+				    ((end - key.offset) >> inode->i_blkbits));
+			WARN_ON(btrfs_file_extent_num_blocks(&old) <
+				(extent_end - end) >> inode->i_blkbits);
+			btrfs_set_file_extent_num_blocks(extent,
+				    (extent_end - end) >> inode->i_blkbits);
+
+			btrfs_set_file_extent_type(extent,
+						   BTRFS_FILE_EXTENT_REG);
+			btrfs_set_file_extent_generation(extent,
+				    btrfs_file_extent_generation(&old));
+			btrfs_mark_buffer_dirty(path->nodes[0]);
+			if (btrfs_file_extent_disk_blocknr(&old) != 0) {
+				inode->i_blocks +=
+				      btrfs_file_extent_num_blocks(extent) << 3;
+			}
+			ret = 0;
+			goto out;
+		}
+	}
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/*
+ * this gets pages into the page cache and locks them down
+ */
+static int prepare_pages(struct btrfs_root *root,
+			 struct file *file,
+			 struct page **pages,
+			 size_t num_pages,
+			 loff_t pos,
+			 unsigned long first_index,
+			 unsigned long last_index,
+			 size_t write_bytes,
+			 u64 alloc_extent_start)
+{
+	int i;
+	unsigned long index = pos >> PAGE_CACHE_SHIFT;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	int offset;
+	int err = 0;
+	int this_write;
+	struct buffer_head *bh;
+	struct buffer_head *head;
+	loff_t isize = i_size_read(inode);
+
+	memset(pages, 0, num_pages * sizeof(struct page *));
+
+	for (i = 0; i < num_pages; i++) {
+		pages[i] = grab_cache_page(inode->i_mapping, index + i);
+		if (!pages[i]) {
+			err = -ENOMEM;
+			goto failed_release;
+		}
+		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
+		wait_on_page_writeback(pages[i]);
+		offset = pos & (PAGE_CACHE_SIZE -1);
+		this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
+		if (!page_has_buffers(pages[i])) {
+			create_empty_buffers(pages[i],
+					     root->fs_info->sb->s_blocksize,
+					     (1 << BH_Uptodate));
+		}
+		head = page_buffers(pages[i]);
+		bh = head;
+		do {
+			err = btrfs_map_bh_to_logical(root, bh,
+						      alloc_extent_start);
+			BUG_ON(err);
+			if (err)
+				goto failed_truncate;
+			bh = bh->b_this_page;
+			if (alloc_extent_start)
+				alloc_extent_start++;
+		} while (bh != head);
+		pos += this_write;
+		WARN_ON(this_write > write_bytes);
+		write_bytes -= this_write;
+	}
+	return 0;
+
+failed_release:
+	btrfs_drop_pages(pages, num_pages);
+	return err;
+
+failed_truncate:
+	btrfs_drop_pages(pages, num_pages);
+	if (pos > isize)
+		vmtruncate(inode, isize);
+	return err;
+}
+
+static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	loff_t pos;
+	size_t num_written = 0;
+	int err = 0;
+	int ret = 0;
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct page *pages[8];
+	struct page *pinned[2];
+	unsigned long first_index;
+	unsigned long last_index;
+	u64 start_pos;
+	u64 num_blocks;
+	u64 alloc_extent_start;
+	u64 hint_block;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_key ins;
+	pinned[0] = NULL;
+	pinned[1] = NULL;
+	if (file->f_flags & O_DIRECT)
+		return -EINVAL;
+	pos = *ppos;
+	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+	current->backing_dev_info = inode->i_mapping->backing_dev_info;
+	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+	if (err)
+		goto out;
+	if (count == 0)
+		goto out;
+	err = remove_suid(file->f_path.dentry);
+	if (err)
+		goto out;
+	file_update_time(file);
+
+	start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
+	num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
+			inode->i_blkbits;
+
+	mutex_lock(&inode->i_mutex);
+	first_index = pos >> PAGE_CACHE_SHIFT;
+	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
+
+	/*
+	 * there are lots of better ways to do this, but this code
+	 * makes sure the first and last page in the file range are
+	 * up to date and ready for cow
+	 */
+	if ((pos & (PAGE_CACHE_SIZE - 1))) {
+		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
+		if (!PageUptodate(pinned[0])) {
+			ret = mpage_readpage(pinned[0], btrfs_get_block);
+			BUG_ON(ret);
+			wait_on_page_locked(pinned[0]);
+		} else {
+			unlock_page(pinned[0]);
+		}
+	}
+	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
+		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
+		if (!PageUptodate(pinned[1])) {
+			ret = mpage_readpage(pinned[1], btrfs_get_block);
+			BUG_ON(ret);
+			wait_on_page_locked(pinned[1]);
+		} else {
+			unlock_page(pinned[1]);
+		}
+	}
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	trans = btrfs_start_transaction(root, 1);
+	if (!trans) {
+		err = -ENOMEM;
+		mutex_unlock(&root->fs_info->fs_mutex);
+		goto out_unlock;
+	}
+	btrfs_set_trans_block_group(trans, inode);
+	/* FIXME blocksize != 4096 */
+	inode->i_blocks += num_blocks << 3;
+	hint_block = 0;
+
+	/* FIXME...EIEIO, ENOSPC and more */
+
+	/* step one, delete the existing extents in this range */
+	if (start_pos < inode->i_size) {
+		/* FIXME blocksize != pagesize */
+		ret = btrfs_drop_extents(trans, root, inode,
+					 start_pos,
+					 (pos + count + root->blocksize -1) &
+					 ~((u64)root->blocksize - 1),
+					 &hint_block);
+		BUG_ON(ret);
+	}
+
+	/* insert any holes we need to create */
+	if (inode->i_size < start_pos) {
+		u64 last_pos_in_file;
+		u64 hole_size;
+		u64 mask = root->blocksize - 1;
+		last_pos_in_file = (inode->i_size + mask) & ~mask;
+		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
+		hole_size >>= inode->i_blkbits;
+		if (last_pos_in_file < start_pos) {
+			ret = btrfs_insert_file_extent(trans, root,
+						       inode->i_ino,
+						       last_pos_in_file,
+						       0, 0, hole_size);
+		}
+		BUG_ON(ret);
+	}
+
+	/*
+	 * either allocate an extent for the new bytes or setup the key
+	 * to show we are doing inline data in the extent
+	 */
+	if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
+	    pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
+		ret = btrfs_alloc_extent(trans, root, inode->i_ino,
+					 num_blocks, hint_block, (u64)-1,
+					 &ins, 1);
+		BUG_ON(ret);
+		ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
+				       start_pos, ins.objectid, ins.offset,
+				       ins.offset);
+		BUG_ON(ret);
+	} else {
+		ins.offset = 0;
+		ins.objectid = 0;
+	}
+	BUG_ON(ret);
+	alloc_extent_start = ins.objectid;
+	ret = btrfs_end_transaction(trans, root);
+	mutex_unlock(&root->fs_info->fs_mutex);
+
+	while(count > 0) {
+		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
+		size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
+		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
+					PAGE_CACHE_SHIFT;
+
+		memset(pages, 0, sizeof(pages));
+		ret = prepare_pages(root, file, pages, num_pages,
+				    pos, first_index, last_index,
+				    write_bytes, alloc_extent_start);
+		BUG_ON(ret);
+
+		/* FIXME blocks != pagesize */
+		if (alloc_extent_start)
+			alloc_extent_start += num_pages;
+		ret = btrfs_copy_from_user(pos, num_pages,
+					   write_bytes, pages, buf);
+		BUG_ON(ret);
+
+		ret = dirty_and_release_pages(NULL, root, file, pages,
+					      num_pages, pos, write_bytes);
+		BUG_ON(ret);
+		btrfs_drop_pages(pages, num_pages);
+
+		buf += write_bytes;
+		count -= write_bytes;
+		pos += write_bytes;
+		num_written += write_bytes;
+
+		balance_dirty_pages_ratelimited(inode->i_mapping);
+		btrfs_btree_balance_dirty(root);
+		cond_resched();
+	}
+out_unlock:
+	mutex_unlock(&inode->i_mutex);
+out:
+	if (pinned[0])
+		page_cache_release(pinned[0]);
+	if (pinned[1])
+		page_cache_release(pinned[1]);
+	*ppos = pos;
+	current->backing_dev_info = NULL;
+	mark_inode_dirty(inode);
+	return num_written ? num_written : err;
+}
+
+/*
+ * FIXME, do this by stuffing the csum we want in the info hanging off
+ * page->private.  For now, verify file csums on read
+ */
+static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
+			unsigned long offset, unsigned long size)
+{
+	char *kaddr;
+	unsigned long left, count = desc->count;
+	struct inode *inode = page->mapping->host;
+
+	if (size > count)
+		size = count;
+
+	if (!PageChecked(page)) {
+		/* FIXME, do it per block */
+		struct btrfs_root *root = BTRFS_I(inode)->root;
+		int ret;
+		struct buffer_head *bh;
+
+		if (page_has_buffers(page)) {
+			bh = page_buffers(page);
+			if (!buffer_mapped(bh)) {
+				SetPageChecked(page);
+				goto checked;
+			}
+		}
+
+		ret = btrfs_csum_verify_file_block(root,
+				  page->mapping->host->i_ino,
+				  page->index << PAGE_CACHE_SHIFT,
+				  kmap(page), PAGE_CACHE_SIZE);
+		if (ret) {
+			if (ret != -ENOENT) {
+				printk("failed to verify ino %lu page %lu ret %d\n",
+				       page->mapping->host->i_ino,
+				       page->index, ret);
+				memset(page_address(page), 1, PAGE_CACHE_SIZE);
+				flush_dcache_page(page);
+			}
+		}
+		SetPageChecked(page);
+		kunmap(page);
+	}
+checked:
+	/*
+	 * Faults on the destination of a read are common, so do it before
+	 * taking the kmap.
+	 */
+	if (!fault_in_pages_writeable(desc->arg.buf, size)) {
+		kaddr = kmap_atomic(page, KM_USER0);
+		left = __copy_to_user_inatomic(desc->arg.buf,
+						kaddr + offset, size);
+		kunmap_atomic(kaddr, KM_USER0);
+		if (left == 0)
+			goto success;
+	}
+
+	/* Do it the slow way */
+	kaddr = kmap(page);
+	left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
+	kunmap(page);
+
+	if (left) {
+		size -= left;
+		desc->error = -EFAULT;
+	}
+success:
+	desc->count = count - size;
+	desc->written += size;
+	desc->arg.buf += size;
+	return size;
+}
+
+/**
+ * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify
+ * @iocb:	kernel I/O control block
+ * @iov:	io vector request
+ * @nr_segs:	number of segments in the iovec
+ * @pos:	current file position
+ */
+static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+				   unsigned long nr_segs, loff_t pos)
+{
+	struct file *filp = iocb->ki_filp;
+	ssize_t retval;
+	unsigned long seg;
+	size_t count;
+	loff_t *ppos = &iocb->ki_pos;
+
+	count = 0;
+	for (seg = 0; seg < nr_segs; seg++) {
+		const struct iovec *iv = &iov[seg];
+
+		/*
+		 * If any segment has a negative length, or the cumulative
+		 * length ever wraps negative then return -EINVAL.
+		 */
+		count += iv->iov_len;
+		if (unlikely((ssize_t)(count|iv->iov_len) < 0))
+			return -EINVAL;
+		if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
+			continue;
+		if (seg == 0)
+			return -EFAULT;
+		nr_segs = seg;
+		count -= iv->iov_len;	/* This segment is no good */
+		break;
+	}
+	retval = 0;
+	if (count) {
+		for (seg = 0; seg < nr_segs; seg++) {
+			read_descriptor_t desc;
+
+			desc.written = 0;
+			desc.arg.buf = iov[seg].iov_base;
+			desc.count = iov[seg].iov_len;
+			if (desc.count == 0)
+				continue;
+			desc.error = 0;
+			do_generic_file_read(filp, ppos, &desc,
+					     btrfs_read_actor);
+			retval += desc.written;
+			if (desc.error) {
+				retval = retval ?: desc.error;
+				break;
+			}
+		}
+	}
+	return retval;
+}
+
+static int btrfs_sync_file(struct file *file,
+			   struct dentry *dentry, int datasync)
+{
+	struct inode *inode = dentry->d_inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	int ret;
+	struct btrfs_trans_handle *trans;
+
+	/*
+	 * FIXME, use inode generation number to check if we can skip the
+	 * commit
+	 */
+	mutex_lock(&root->fs_info->fs_mutex);
+	trans = btrfs_start_transaction(root, 1);
+	if (!trans) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ret = btrfs_commit_transaction(trans, root);
+	mutex_unlock(&root->fs_info->fs_mutex);
+out:
+	return ret > 0 ? EIO : ret;
+}
+
+struct file_operations btrfs_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.aio_read       = btrfs_file_aio_read,
+	.write		= btrfs_file_write,
+	.mmap		= generic_file_mmap,
+	.open		= generic_file_open,
+	.ioctl		= btrfs_ioctl,
+	.fsync		= btrfs_sync_file,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= btrfs_compat_ioctl,
+#endif
+};
+
-- 
cgit v1.2.2


From 84f54cfa78c81991e087309a9b379f25f1ffdb10 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 12 Jun 2007 07:43:08 -0400
Subject: Btrfs: 64 bit div fixes

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7f8e3035d99e..0325dc038592 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -82,7 +82,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 
 	for (i = 0; i < num_pages; i++) {
 		offset = pos & (PAGE_CACHE_SIZE -1);
-		this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
+		this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
 		/* FIXME, one block at a time */
 
 		mutex_lock(&root->fs_info->fs_mutex);
@@ -395,7 +395,7 @@ static int prepare_pages(struct btrfs_root *root,
 		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
 		wait_on_page_writeback(pages[i]);
 		offset = pos & (PAGE_CACHE_SIZE -1);
-		this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
+		this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
 		if (!page_has_buffers(pages[i])) {
 			create_empty_buffers(pages[i],
 					     root->fs_info->sb->s_blocksize,
@@ -567,7 +567,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 
 	while(count > 0) {
 		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
-		size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
+		size_t write_bytes = min(count,
+					 (size_t)PAGE_CACHE_SIZE - offset);
 		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
 					PAGE_CACHE_SHIFT;
 
-- 
cgit v1.2.2


From 6cbd55707802b98843f953d1ae6d8f5bcd9a76c0 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 12 Jun 2007 09:07:21 -0400
Subject: Btrfs: add GPLv2

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0325dc038592..566e526b60ff 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1,3 +1,21 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
 #include <linux/module.h>
 #include <linux/buffer_head.h>
 #include <linux/fs.h>
-- 
cgit v1.2.2


From 340887809d92a4dd6c71caa97d658cef32c098c3 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 12 Jun 2007 11:36:58 -0400
Subject: Btrfs: i386 fixes from axboe

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 566e526b60ff..83836fb3d4e9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -112,7 +112,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		if (buffer_mapped(bh) && bh->b_blocknr == 0) {
 			struct btrfs_key key;
 			struct btrfs_path *path;
-			char *ptr;
+			char *ptr, *kaddr;
 			u32 datasize;
 
 			/* create an inline extent, and copy the data in */
@@ -135,8 +135,11 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			btrfs_set_file_extent_type(ei,
 						   BTRFS_FILE_EXTENT_INLINE);
 			ptr = btrfs_file_extent_inline_start(ei);
+			kaddr = kmap_atomic(bh->b_page, KM_USER0);
 			btrfs_memcpy(root, path->nodes[0]->b_data,
-				     ptr, bh->b_data, offset + write_bytes);
+				     ptr, kaddr + bh_offset(bh),
+				     offset + write_bytes);
+			kunmap_atomic(kaddr, KM_USER0);
 			mark_buffer_dirty(path->nodes[0]);
 			btrfs_free_path(path);
 		} else if (buffer_mapped(bh)) {
-- 
cgit v1.2.2


From 9ebefb180bad4914a31c4e1748ba187a30e1e990 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 15 Jun 2007 13:50:00 -0400
Subject: Btrfs: patch queue: page_mkwrite

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 176 +++++++++-----------------------------------------------
 1 file changed, 26 insertions(+), 150 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 83836fb3d4e9..de8d47b44e12 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -103,10 +103,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
 		/* FIXME, one block at a time */
 
-		mutex_lock(&root->fs_info->fs_mutex);
-		trans = btrfs_start_transaction(root, 1);
-		btrfs_set_trans_block_group(trans, inode);
-
 		bh = page_buffers(pages[i]);
 
 		if (buffer_mapped(bh) && bh->b_blocknr == 0) {
@@ -115,6 +111,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			char *ptr, *kaddr;
 			u32 datasize;
 
+			mutex_lock(&root->fs_info->fs_mutex);
+			trans = btrfs_start_transaction(root, 1);
+			btrfs_set_trans_block_group(trans, inode);
+
 			/* create an inline extent, and copy the data in */
 			path = btrfs_alloc_path();
 			BUG_ON(!path);
@@ -135,24 +135,19 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			btrfs_set_file_extent_type(ei,
 						   BTRFS_FILE_EXTENT_INLINE);
 			ptr = btrfs_file_extent_inline_start(ei);
+
 			kaddr = kmap_atomic(bh->b_page, KM_USER0);
 			btrfs_memcpy(root, path->nodes[0]->b_data,
 				     ptr, kaddr + bh_offset(bh),
 				     offset + write_bytes);
 			kunmap_atomic(kaddr, KM_USER0);
+
 			mark_buffer_dirty(path->nodes[0]);
 			btrfs_free_path(path);
-		} else if (buffer_mapped(bh)) {
-			/* csum the file data */
-			btrfs_csum_file_block(trans, root, inode->i_ino,
-				      pages[i]->index << PAGE_CACHE_SHIFT,
-				      kmap(pages[i]), PAGE_CACHE_SIZE);
-			kunmap(pages[i]);
+			ret = btrfs_end_transaction(trans, root);
+			BUG_ON(ret);
+			mutex_unlock(&root->fs_info->fs_mutex);
 		}
-		SetPageChecked(pages[i]);
-		ret = btrfs_end_transaction(trans, root);
-		BUG_ON(ret);
-		mutex_unlock(&root->fs_info->fs_mutex);
 
 		ret = btrfs_commit_write(file, pages[i], offset,
 					 offset + this_write);
@@ -503,7 +498,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	if ((pos & (PAGE_CACHE_SIZE - 1))) {
 		pinned[0] = grab_cache_page(inode->i_mapping, first_index);
 		if (!PageUptodate(pinned[0])) {
-			ret = mpage_readpage(pinned[0], btrfs_get_block);
+			ret = btrfs_readpage(NULL, pinned[0]);
 			BUG_ON(ret);
 			wait_on_page_locked(pinned[0]);
 		} else {
@@ -513,7 +508,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
 		pinned[1] = grab_cache_page(inode->i_mapping, last_index);
 		if (!PageUptodate(pinned[1])) {
-			ret = mpage_readpage(pinned[1], btrfs_get_block);
+			ret = btrfs_readpage(NULL, pinned[1]);
 			BUG_ON(ret);
 			wait_on_page_locked(pinned[1]);
 		} else {
@@ -633,138 +628,6 @@ out:
 	return num_written ? num_written : err;
 }
 
-/*
- * FIXME, do this by stuffing the csum we want in the info hanging off
- * page->private.  For now, verify file csums on read
- */
-static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
-			unsigned long offset, unsigned long size)
-{
-	char *kaddr;
-	unsigned long left, count = desc->count;
-	struct inode *inode = page->mapping->host;
-
-	if (size > count)
-		size = count;
-
-	if (!PageChecked(page)) {
-		/* FIXME, do it per block */
-		struct btrfs_root *root = BTRFS_I(inode)->root;
-		int ret;
-		struct buffer_head *bh;
-
-		if (page_has_buffers(page)) {
-			bh = page_buffers(page);
-			if (!buffer_mapped(bh)) {
-				SetPageChecked(page);
-				goto checked;
-			}
-		}
-
-		ret = btrfs_csum_verify_file_block(root,
-				  page->mapping->host->i_ino,
-				  page->index << PAGE_CACHE_SHIFT,
-				  kmap(page), PAGE_CACHE_SIZE);
-		if (ret) {
-			if (ret != -ENOENT) {
-				printk("failed to verify ino %lu page %lu ret %d\n",
-				       page->mapping->host->i_ino,
-				       page->index, ret);
-				memset(page_address(page), 1, PAGE_CACHE_SIZE);
-				flush_dcache_page(page);
-			}
-		}
-		SetPageChecked(page);
-		kunmap(page);
-	}
-checked:
-	/*
-	 * Faults on the destination of a read are common, so do it before
-	 * taking the kmap.
-	 */
-	if (!fault_in_pages_writeable(desc->arg.buf, size)) {
-		kaddr = kmap_atomic(page, KM_USER0);
-		left = __copy_to_user_inatomic(desc->arg.buf,
-						kaddr + offset, size);
-		kunmap_atomic(kaddr, KM_USER0);
-		if (left == 0)
-			goto success;
-	}
-
-	/* Do it the slow way */
-	kaddr = kmap(page);
-	left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
-	kunmap(page);
-
-	if (left) {
-		size -= left;
-		desc->error = -EFAULT;
-	}
-success:
-	desc->count = count - size;
-	desc->written += size;
-	desc->arg.buf += size;
-	return size;
-}
-
-/**
- * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify
- * @iocb:	kernel I/O control block
- * @iov:	io vector request
- * @nr_segs:	number of segments in the iovec
- * @pos:	current file position
- */
-static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-				   unsigned long nr_segs, loff_t pos)
-{
-	struct file *filp = iocb->ki_filp;
-	ssize_t retval;
-	unsigned long seg;
-	size_t count;
-	loff_t *ppos = &iocb->ki_pos;
-
-	count = 0;
-	for (seg = 0; seg < nr_segs; seg++) {
-		const struct iovec *iv = &iov[seg];
-
-		/*
-		 * If any segment has a negative length, or the cumulative
-		 * length ever wraps negative then return -EINVAL.
-		 */
-		count += iv->iov_len;
-		if (unlikely((ssize_t)(count|iv->iov_len) < 0))
-			return -EINVAL;
-		if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
-			continue;
-		if (seg == 0)
-			return -EFAULT;
-		nr_segs = seg;
-		count -= iv->iov_len;	/* This segment is no good */
-		break;
-	}
-	retval = 0;
-	if (count) {
-		for (seg = 0; seg < nr_segs; seg++) {
-			read_descriptor_t desc;
-
-			desc.written = 0;
-			desc.arg.buf = iov[seg].iov_base;
-			desc.count = iov[seg].iov_len;
-			if (desc.count == 0)
-				continue;
-			desc.error = 0;
-			do_generic_file_read(filp, ppos, &desc,
-					     btrfs_read_actor);
-			retval += desc.written;
-			if (desc.error) {
-				retval = retval ?: desc.error;
-				break;
-			}
-		}
-	}
-	return retval;
-}
-
 static int btrfs_sync_file(struct file *file,
 			   struct dentry *dentry, int datasync)
 {
@@ -789,12 +652,25 @@ out:
 	return ret > 0 ? EIO : ret;
 }
 
+static struct vm_operations_struct btrfs_file_vm_ops = {
+	.nopage		= filemap_nopage,
+	.populate	= filemap_populate,
+	.page_mkwrite	= btrfs_page_mkwrite,
+};
+
+static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
+{
+	vma->vm_ops = &btrfs_file_vm_ops;
+	file_accessed(filp);
+	return 0;
+}
+
 struct file_operations btrfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
-	.aio_read       = btrfs_file_aio_read,
+	.aio_read       = generic_file_aio_read,
 	.write		= btrfs_file_write,
-	.mmap		= generic_file_mmap,
+	.mmap		= btrfs_file_mmap,
 	.open		= generic_file_open,
 	.ioctl		= btrfs_ioctl,
 	.fsync		= btrfs_sync_file,
-- 
cgit v1.2.2


From 8c2383c3dd2cb5bb39598ce4fa97154bc591020a Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 18 Jun 2007 09:57:58 -0400
Subject: Subject: Rework btrfs_file_write to only allocate while page locks
 are held

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 244 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 133 insertions(+), 111 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index de8d47b44e12..6b455c2b3f03 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -207,6 +207,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 			}
 			path->slots[0]--;
 		}
+next_slot:
 		keep = 0;
 		bookend = 0;
 		found_extent = 0;
@@ -214,39 +215,48 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		extent = NULL;
 		leaf = btrfs_buffer_leaf(path->nodes[0]);
 		slot = path->slots[0];
+		ret = 0;
 		btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
 		if (key.offset >= end || key.objectid != inode->i_ino) {
-			ret = 0;
 			goto out;
 		}
-		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
-			ret = 0;
+		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) {
 			goto out;
 		}
-		extent = btrfs_item_ptr(leaf, slot,
-					struct btrfs_file_extent_item);
-		found_type = btrfs_file_extent_type(extent);
-		if (found_type == BTRFS_FILE_EXTENT_REG) {
-			extent_end = key.offset +
-				(btrfs_file_extent_num_blocks(extent) <<
-				 inode->i_blkbits);
-			found_extent = 1;
-		} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
-			found_inline = 1;
-			extent_end = key.offset +
-			     btrfs_file_extent_inline_len(leaf->items + slot);
+		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
+			extent = btrfs_item_ptr(leaf, slot,
+						struct btrfs_file_extent_item);
+			found_type = btrfs_file_extent_type(extent);
+			if (found_type == BTRFS_FILE_EXTENT_REG) {
+				extent_end = key.offset +
+					(btrfs_file_extent_num_blocks(extent) <<
+					 inode->i_blkbits);
+				found_extent = 1;
+			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+				found_inline = 1;
+				extent_end = key.offset +
+				     btrfs_file_extent_inline_len(leaf->items +
+								  slot);
+			}
+		} else {
+			extent_end = search_start;
 		}
 
 		/* we found nothing we can drop */
-		if (!found_extent && !found_inline) {
-			ret = 0;
-			goto out;
-		}
-
-		/* we found nothing inside the range */
-		if (search_start >= extent_end) {
-			ret = 0;
-			goto out;
+		if ((!found_extent && !found_inline) ||
+		    search_start >= extent_end) {
+			int nextret;
+			u32 nritems;
+			nritems = btrfs_header_nritems(
+					btrfs_buffer_header(path->nodes[0]));
+			if (slot >= nritems - 1) {
+				nextret = btrfs_next_leaf(root, path);
+				if (nextret)
+					goto out;
+			} else {
+				path->slots[0]++;
+			}
+			goto next_slot;
 		}
 
 		/* FIXME, there's only one inline extent allowed right now */
@@ -272,7 +282,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 			WARN_ON(found_inline);
 			bookend = 1;
 		}
-
 		/* truncate existing extent */
 		if (start > key.offset) {
 			u64 new_num;
@@ -337,10 +346,14 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 			ins.offset = end;
 			ins.flags = 0;
 			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
-
 			btrfs_release_path(root, path);
 			ret = btrfs_insert_empty_item(trans, root, path, &ins,
 						      sizeof(*extent));
+
+			if (ret) {
+				btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0]));
+				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end);
+			}
 			BUG_ON(ret);
 			extent = btrfs_item_ptr(
 				    btrfs_buffer_leaf(path->nodes[0]),
@@ -387,8 +400,7 @@ static int prepare_pages(struct btrfs_root *root,
 			 loff_t pos,
 			 unsigned long first_index,
 			 unsigned long last_index,
-			 size_t write_bytes,
-			 u64 alloc_extent_start)
+			 size_t write_bytes)
 {
 	int i;
 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
@@ -399,6 +411,16 @@ static int prepare_pages(struct btrfs_root *root,
 	struct buffer_head *bh;
 	struct buffer_head *head;
 	loff_t isize = i_size_read(inode);
+	struct btrfs_trans_handle *trans;
+	u64 hint_block;
+	u64 num_blocks;
+	u64 alloc_extent_start;
+	u64 start_pos;
+	struct btrfs_key ins;
+
+	start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
+	num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
+			inode->i_blkbits;
 
 	memset(pages, 0, num_pages * sizeof(struct page *));
 
@@ -408,6 +430,72 @@ static int prepare_pages(struct btrfs_root *root,
 			err = -ENOMEM;
 			goto failed_release;
 		}
+	}
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	trans = btrfs_start_transaction(root, 1);
+	if (!trans) {
+		err = -ENOMEM;
+		mutex_unlock(&root->fs_info->fs_mutex);
+		goto out_unlock;
+	}
+	btrfs_set_trans_block_group(trans, inode);
+	/* FIXME blocksize != 4096 */
+	inode->i_blocks += num_blocks << 3;
+	hint_block = 0;
+
+	/* FIXME...EIEIO, ENOSPC and more */
+
+	/* step one, delete the existing extents in this range */
+	/* FIXME blocksize != pagesize */
+	if (start_pos < inode->i_size) {
+		err = btrfs_drop_extents(trans, root, inode,
+			 start_pos, (pos + write_bytes + root->blocksize -1) &
+			 ~((u64)root->blocksize - 1), &hint_block);
+		BUG_ON(err);
+	}
+
+	/* insert any holes we need to create */
+	if (inode->i_size < start_pos) {
+		u64 last_pos_in_file;
+		u64 hole_size;
+		u64 mask = root->blocksize - 1;
+		last_pos_in_file = (isize + mask) & ~mask;
+		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
+		hole_size >>= inode->i_blkbits;
+		if (last_pos_in_file < start_pos) {
+			err = btrfs_insert_file_extent(trans, root,
+						       inode->i_ino,
+						       last_pos_in_file,
+						       0, 0, hole_size);
+		}
+		BUG_ON(err);
+	}
+
+	/*
+	 * either allocate an extent for the new bytes or setup the key
+	 * to show we are doing inline data in the extent
+	 */
+	if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
+	    pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
+		err = btrfs_alloc_extent(trans, root, inode->i_ino,
+					 num_blocks, hint_block, (u64)-1,
+					 &ins, 1);
+		BUG_ON(err);
+		err = btrfs_insert_file_extent(trans, root, inode->i_ino,
+				       start_pos, ins.objectid, ins.offset,
+				       ins.offset);
+		BUG_ON(err);
+	} else {
+		ins.offset = 0;
+		ins.objectid = 0;
+	}
+	BUG_ON(err);
+	alloc_extent_start = ins.objectid;
+	err = btrfs_end_transaction(trans, root);
+	mutex_unlock(&root->fs_info->fs_mutex);
+
+	for (i = 0; i < num_pages; i++) {
 		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
 		wait_on_page_writeback(pages[i]);
 		offset = pos & (PAGE_CACHE_SIZE -1);
@@ -444,6 +532,11 @@ failed_truncate:
 	if (pos > isize)
 		vmtruncate(inode, isize);
 	return err;
+
+out_unlock:
+	mutex_unlock(&root->fs_info->fs_mutex);
+	goto failed_release;
+
 }
 
 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
@@ -455,16 +548,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	int ret = 0;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct page *pages[8];
+	struct page **pages = NULL;
+	int nrptrs;
 	struct page *pinned[2];
 	unsigned long first_index;
 	unsigned long last_index;
-	u64 start_pos;
-	u64 num_blocks;
-	u64 alloc_extent_start;
-	u64 hint_block;
-	struct btrfs_trans_handle *trans;
-	struct btrfs_key ins;
+
+	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
+		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
 	pinned[0] = NULL;
 	pinned[1] = NULL;
 	if (file->f_flags & O_DIRECT)
@@ -482,9 +573,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		goto out;
 	file_update_time(file);
 
-	start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
-	num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
-			inode->i_blkbits;
+	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
 
 	mutex_lock(&inode->i_mutex);
 	first_index = pos >> PAGE_CACHE_SHIFT;
@@ -516,87 +605,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		}
 	}
 
-	mutex_lock(&root->fs_info->fs_mutex);
-	trans = btrfs_start_transaction(root, 1);
-	if (!trans) {
-		err = -ENOMEM;
-		mutex_unlock(&root->fs_info->fs_mutex);
-		goto out_unlock;
-	}
-	btrfs_set_trans_block_group(trans, inode);
-	/* FIXME blocksize != 4096 */
-	inode->i_blocks += num_blocks << 3;
-	hint_block = 0;
-
-	/* FIXME...EIEIO, ENOSPC and more */
-
-	/* step one, delete the existing extents in this range */
-	if (start_pos < inode->i_size) {
-		/* FIXME blocksize != pagesize */
-		ret = btrfs_drop_extents(trans, root, inode,
-					 start_pos,
-					 (pos + count + root->blocksize -1) &
-					 ~((u64)root->blocksize - 1),
-					 &hint_block);
-		BUG_ON(ret);
-	}
-
-	/* insert any holes we need to create */
-	if (inode->i_size < start_pos) {
-		u64 last_pos_in_file;
-		u64 hole_size;
-		u64 mask = root->blocksize - 1;
-		last_pos_in_file = (inode->i_size + mask) & ~mask;
-		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
-		hole_size >>= inode->i_blkbits;
-		if (last_pos_in_file < start_pos) {
-			ret = btrfs_insert_file_extent(trans, root,
-						       inode->i_ino,
-						       last_pos_in_file,
-						       0, 0, hole_size);
-		}
-		BUG_ON(ret);
-	}
-
-	/*
-	 * either allocate an extent for the new bytes or setup the key
-	 * to show we are doing inline data in the extent
-	 */
-	if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
-	    pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
-		ret = btrfs_alloc_extent(trans, root, inode->i_ino,
-					 num_blocks, hint_block, (u64)-1,
-					 &ins, 1);
-		BUG_ON(ret);
-		ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
-				       start_pos, ins.objectid, ins.offset,
-				       ins.offset);
-		BUG_ON(ret);
-	} else {
-		ins.offset = 0;
-		ins.objectid = 0;
-	}
-	BUG_ON(ret);
-	alloc_extent_start = ins.objectid;
-	ret = btrfs_end_transaction(trans, root);
-	mutex_unlock(&root->fs_info->fs_mutex);
-
 	while(count > 0) {
 		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
-		size_t write_bytes = min(count,
-					 (size_t)PAGE_CACHE_SIZE - offset);
+		size_t write_bytes = min(count, nrptrs * PAGE_CACHE_SIZE -
+					 offset);
 		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
 					PAGE_CACHE_SHIFT;
 
+		WARN_ON(num_pages > nrptrs);
 		memset(pages, 0, sizeof(pages));
 		ret = prepare_pages(root, file, pages, num_pages,
 				    pos, first_index, last_index,
-				    write_bytes, alloc_extent_start);
+				    write_bytes);
 		BUG_ON(ret);
 
-		/* FIXME blocks != pagesize */
-		if (alloc_extent_start)
-			alloc_extent_start += num_pages;
 		ret = btrfs_copy_from_user(pos, num_pages,
 					   write_bytes, pages, buf);
 		BUG_ON(ret);
@@ -611,13 +633,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		pos += write_bytes;
 		num_written += write_bytes;
 
-		balance_dirty_pages_ratelimited(inode->i_mapping);
+		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
 		btrfs_btree_balance_dirty(root);
 		cond_resched();
 	}
-out_unlock:
 	mutex_unlock(&inode->i_mutex);
 out:
+	kfree(pages);
 	if (pinned[0])
 		page_cache_release(pinned[0]);
 	if (pinned[1])
-- 
cgit v1.2.2


From 11bd143fc8243cf48c934dc1c4479a5aacf58ce3 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 22 Jun 2007 14:16:24 -0400
Subject: Btrfs: Switch to libcrc32c to avoid problems with cryptomgr on
 highmem machines

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 6b455c2b3f03..23f029359688 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -607,7 +607,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 
 	while(count > 0) {
 		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
-		size_t write_bytes = min(count, nrptrs * PAGE_CACHE_SIZE -
+		size_t write_bytes = min(count, nrptrs *
+					(size_t)PAGE_CACHE_SIZE -
 					 offset);
 		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
 					PAGE_CACHE_SHIFT;
-- 
cgit v1.2.2


From 54aa1f4dfdacd60a19c4471220b24e581be6f774 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 22 Jun 2007 14:16:25 -0400
Subject: Btrfs: Audit callers and return codes to make sure -ENOSPC gets up
 the stack

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 131 +++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 81 insertions(+), 50 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 23f029359688..fef7ba1e707f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -81,6 +81,62 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
 	}
 }
 
+static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
+				u64 offset, ssize_t size,
+				struct buffer_head *bh)
+{
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	char *ptr, *kaddr;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_file_extent_item *ei;
+	u32 datasize;
+	int err = 0;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	trans = btrfs_start_transaction(root, 1);
+	btrfs_set_trans_block_group(trans, inode);
+
+	key.objectid = inode->i_ino;
+	key.offset = offset;
+	key.flags = 0;
+	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
+	BUG_ON(size >= PAGE_CACHE_SIZE);
+	datasize = btrfs_file_extent_calc_inline_size(size);
+
+	ret = btrfs_insert_empty_item(trans, root, path, &key,
+				      datasize);
+	if (ret) {
+		err = ret;
+		goto fail;
+	}
+	ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
+	       path->slots[0], struct btrfs_file_extent_item);
+	btrfs_set_file_extent_generation(ei, trans->transid);
+	btrfs_set_file_extent_type(ei,
+				   BTRFS_FILE_EXTENT_INLINE);
+	ptr = btrfs_file_extent_inline_start(ei);
+
+	kaddr = kmap_atomic(bh->b_page, KM_USER0);
+	btrfs_memcpy(root, path->nodes[0]->b_data,
+		     ptr, kaddr + bh_offset(bh),
+		     size);
+	kunmap_atomic(kaddr, KM_USER0);
+	mark_buffer_dirty(path->nodes[0]);
+fail:
+	btrfs_free_path(path);
+	ret = btrfs_end_transaction(trans, root);
+	if (ret && !err)
+		err = ret;
+	mutex_unlock(&root->fs_info->fs_mutex);
+	return err;
+}
+
 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
 				   struct file *file,
@@ -96,57 +152,22 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	int this_write;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct buffer_head *bh;
-	struct btrfs_file_extent_item *ei;
 
 	for (i = 0; i < num_pages; i++) {
 		offset = pos & (PAGE_CACHE_SIZE -1);
 		this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
-		/* FIXME, one block at a time */
 
+		/* FIXME, one block at a time */
 		bh = page_buffers(pages[i]);
 
 		if (buffer_mapped(bh) && bh->b_blocknr == 0) {
-			struct btrfs_key key;
-			struct btrfs_path *path;
-			char *ptr, *kaddr;
-			u32 datasize;
-
-			mutex_lock(&root->fs_info->fs_mutex);
-			trans = btrfs_start_transaction(root, 1);
-			btrfs_set_trans_block_group(trans, inode);
-
-			/* create an inline extent, and copy the data in */
-			path = btrfs_alloc_path();
-			BUG_ON(!path);
-			key.objectid = inode->i_ino;
-			key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
-			key.flags = 0;
-			btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
-			BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
-			datasize = offset +
-				btrfs_file_extent_calc_inline_size(write_bytes);
-
-			ret = btrfs_insert_empty_item(trans, root, path, &key,
-						      datasize);
-			BUG_ON(ret);
-			ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
-			       path->slots[0], struct btrfs_file_extent_item);
-			btrfs_set_file_extent_generation(ei, trans->transid);
-			btrfs_set_file_extent_type(ei,
-						   BTRFS_FILE_EXTENT_INLINE);
-			ptr = btrfs_file_extent_inline_start(ei);
-
-			kaddr = kmap_atomic(bh->b_page, KM_USER0);
-			btrfs_memcpy(root, path->nodes[0]->b_data,
-				     ptr, kaddr + bh_offset(bh),
-				     offset + write_bytes);
-			kunmap_atomic(kaddr, KM_USER0);
-
-			mark_buffer_dirty(path->nodes[0]);
-			btrfs_free_path(path);
-			ret = btrfs_end_transaction(trans, root);
-			BUG_ON(ret);
-			mutex_unlock(&root->fs_info->fs_mutex);
+			ret = insert_inline_extent(root, inode,
+					pages[i]->index << PAGE_CACHE_SHIFT,
+					offset + this_write, bh);
+			if (ret) {
+				err = ret;
+				goto failed;
+			}
 		}
 
 		ret = btrfs_commit_write(file, pages[i], offset,
@@ -321,6 +342,7 @@ next_slot:
 					btrfs_file_extent_disk_blocknr(extent);
 			}
 			ret = btrfs_del_item(trans, root, path);
+			/* TODO update progress marker and return */
 			BUG_ON(ret);
 			btrfs_release_path(root, path);
 			extent = NULL;
@@ -452,7 +474,8 @@ static int prepare_pages(struct btrfs_root *root,
 		err = btrfs_drop_extents(trans, root, inode,
 			 start_pos, (pos + write_bytes + root->blocksize -1) &
 			 ~((u64)root->blocksize - 1), &hint_block);
-		BUG_ON(err);
+		if (err)
+			goto failed_release;
 	}
 
 	/* insert any holes we need to create */
@@ -469,7 +492,8 @@ static int prepare_pages(struct btrfs_root *root,
 						       last_pos_in_file,
 						       0, 0, hole_size);
 		}
-		BUG_ON(err);
+		if (err)
+			goto failed_release;
 	}
 
 	/*
@@ -481,11 +505,13 @@ static int prepare_pages(struct btrfs_root *root,
 		err = btrfs_alloc_extent(trans, root, inode->i_ino,
 					 num_blocks, hint_block, (u64)-1,
 					 &ins, 1);
-		BUG_ON(err);
+		if (err)
+			goto failed_truncate;
 		err = btrfs_insert_file_extent(trans, root, inode->i_ino,
 				       start_pos, ins.objectid, ins.offset,
 				       ins.offset);
-		BUG_ON(err);
+		if (err)
+			goto failed_truncate;
 	} else {
 		ins.offset = 0;
 		ins.objectid = 0;
@@ -618,16 +644,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		ret = prepare_pages(root, file, pages, num_pages,
 				    pos, first_index, last_index,
 				    write_bytes);
-		BUG_ON(ret);
+		if (ret)
+			goto out;
 
 		ret = btrfs_copy_from_user(pos, num_pages,
 					   write_bytes, pages, buf);
-		BUG_ON(ret);
+		if (ret) {
+			btrfs_drop_pages(pages, num_pages);
+			goto out;
+		}
 
 		ret = dirty_and_release_pages(NULL, root, file, pages,
 					      num_pages, pos, write_bytes);
-		BUG_ON(ret);
 		btrfs_drop_pages(pages, num_pages);
+		if (ret)
+			goto out;
 
 		buf += write_bytes;
 		count -= write_bytes;
-- 
cgit v1.2.2


From ccd467d60e81b48cdbecae93532b66bcdedca91d Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 28 Jun 2007 15:57:36 -0400
Subject: Btrfs: crash recovery fixes

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fef7ba1e707f..2456cc3e1cfd 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -127,7 +127,7 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
 		     ptr, kaddr + bh_offset(bh),
 		     size);
 	kunmap_atomic(kaddr, KM_USER0);
-	mark_buffer_dirty(path->nodes[0]);
+	btrfs_mark_buffer_dirty(path->nodes[0]);
 fail:
 	btrfs_free_path(path);
 	ret = btrfs_end_transaction(trans, root);
@@ -211,11 +211,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	int found_type;
 	int found_extent;
 	int found_inline;
+	int recow;
 
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
 	while(1) {
+		recow = 0;
 		btrfs_release_path(root, path);
 		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
 					       search_start, -1);
@@ -244,6 +246,10 @@ next_slot:
 		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) {
 			goto out;
 		}
+		if (recow) {
+			search_start = key.offset;
+			continue;
+		}
 		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
 			extent = btrfs_item_ptr(leaf, slot,
 						struct btrfs_file_extent_item);
@@ -274,6 +280,7 @@ next_slot:
 				nextret = btrfs_next_leaf(root, path);
 				if (nextret)
 					goto out;
+				recow = 1;
 			} else {
 				path->slots[0]++;
 			}
@@ -321,7 +328,7 @@ next_slot:
 				}
 				btrfs_set_file_extent_num_blocks(extent,
 								 new_num);
-				mark_buffer_dirty(path->nodes[0]);
+				btrfs_mark_buffer_dirty(path->nodes[0]);
 			} else {
 				WARN_ON(1);
 			}
@@ -452,6 +459,8 @@ static int prepare_pages(struct btrfs_root *root,
 			err = -ENOMEM;
 			goto failed_release;
 		}
+		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
+		wait_on_page_writeback(pages[i]);
 	}
 
 	mutex_lock(&root->fs_info->fs_mutex);
@@ -522,8 +531,6 @@ static int prepare_pages(struct btrfs_root *root,
 	mutex_unlock(&root->fs_info->fs_mutex);
 
 	for (i = 0; i < num_pages; i++) {
-		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
-		wait_on_page_writeback(pages[i]);
 		offset = pos & (PAGE_CACHE_SIZE -1);
 		this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
 		if (!page_has_buffers(pages[i])) {
-- 
cgit v1.2.2


From ec6b910fb330f29e169c9f502c3ac209515af8d1 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Wed, 11 Jul 2007 10:00:37 -0400
Subject: Btrfs: trivial include fixups

Almost none of the files including module.h need to do so,
remove them.

Include sched.h in extent-tree.c to silence a warning about cond_resched()
being undeclared.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2456cc3e1cfd..f327525f541d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -16,7 +16,6 @@
  * Boston, MA 021110-1307, USA.
  */
 
-#include <linux/module.h>
 #include <linux/buffer_head.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-- 
cgit v1.2.2


From 92fee66d49b812ea57eae53c5baaf8942ac9af06 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 25 Jul 2007 12:31:35 -0400
Subject: Btrfs: deal with api changes in 2.6.23-rc1

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f327525f541d..1fe38fe84150 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -30,6 +30,7 @@
 #include <linux/writeback.h>
 #include <linux/statfs.h>
 #include <linux/compat.h>
+#include <linux/version.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -713,8 +714,12 @@ out:
 }
 
 static struct vm_operations_struct btrfs_file_vm_ops = {
-	.nopage		= filemap_nopage,
-	.populate	= filemap_populate,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+	.nopage         = filemap_nopage,
+	.populate       = filemap_populate,
+#else
+	.fault		= filemap_fault,
+#endif
 	.page_mkwrite	= btrfs_page_mkwrite,
 };
 
-- 
cgit v1.2.2


From 6702ed490ca0bb44e17131818a5a18b773957c5a Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 7 Aug 2007 16:15:09 -0400
Subject: Btrfs: Add run time btree defrag, and an ioctl to force btree defrag

This adds two types of btree defrag, a run time form that tries to
defrag recently allocated blocks in the btree when they are still in ram,
and an ioctl that forces defrag of all btree blocks.

File data blocks are not defragged yet, but this can make a huge difference
in sequential btree reads.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1fe38fe84150..00b118a2db69 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root,
 	if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
 	    pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		err = btrfs_alloc_extent(trans, root, inode->i_ino,
-					 num_blocks, hint_block, (u64)-1,
+					 num_blocks, 0, hint_block, (u64)-1,
 					 &ins, 1);
 		if (err)
 			goto failed_truncate;
-- 
cgit v1.2.2


From 15ee9bc7ed171248d1405df5854da5fa91bfdc39 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jwhiter@redhat.com>
Date: Fri, 10 Aug 2007 16:22:09 -0400
Subject: Btrfs: delay commits during fsync to allow more writers

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 00b118a2db69..6933ab11a5cd 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -694,22 +694,36 @@ static int btrfs_sync_file(struct file *file,
 {
 	struct inode *inode = dentry->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	int ret;
+	int ret = 0;
 	struct btrfs_trans_handle *trans;
 
 	/*
-	 * FIXME, use inode generation number to check if we can skip the
-	 * commit
+	 * check the transaction that last modified this inode
+	 * and see if its already been committed
 	 */
 	mutex_lock(&root->fs_info->fs_mutex);
+	if (!BTRFS_I(inode)->last_trans)
+		goto out;
+	mutex_lock(&root->fs_info->trans_mutex);
+	if (BTRFS_I(inode)->last_trans <=
+	    root->fs_info->last_trans_committed) {
+		BTRFS_I(inode)->last_trans = 0;
+		mutex_unlock(&root->fs_info->trans_mutex);
+		goto out;
+	}
+	mutex_unlock(&root->fs_info->trans_mutex);
+
+	/*
+ 	 * ok we haven't committed the transaction yet, lets do a commit
+ 	 */
 	trans = btrfs_start_transaction(root, 1);
 	if (!trans) {
 		ret = -ENOMEM;
 		goto out;
 	}
 	ret = btrfs_commit_transaction(trans, root);
-	mutex_unlock(&root->fs_info->fs_mutex);
 out:
+	mutex_unlock(&root->fs_info->fs_mutex);
 	return ret > 0 ? EIO : ret;
 }
 
-- 
cgit v1.2.2


From a52d9a8033c454cd9b4697cfafb467509fc1693f Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 27 Aug 2007 16:49:44 -0400
Subject: Btrfs: Extent based page cache code.  This uses an rbtree of extents
 and tests instead of buffer heads.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 295 ++++++++++++++++++++++++++------------------------------
 1 file changed, 138 insertions(+), 157 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 6933ab11a5cd..71a481894ab6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -81,14 +81,14 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
 	}
 }
 
-static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
+static int insert_inline_extent(struct btrfs_trans_handle *trans,
+				struct btrfs_root *root, struct inode *inode,
 				u64 offset, ssize_t size,
-				struct buffer_head *bh)
+				struct page *page, size_t page_offset)
 {
 	struct btrfs_key key;
 	struct btrfs_path *path;
 	char *ptr, *kaddr;
-	struct btrfs_trans_handle *trans;
 	struct btrfs_file_extent_item *ei;
 	u32 datasize;
 	int err = 0;
@@ -98,8 +98,6 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
 	if (!path)
 		return -ENOMEM;
 
-	mutex_lock(&root->fs_info->fs_mutex);
-	trans = btrfs_start_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, inode);
 
 	key.objectid = inode->i_ino;
@@ -122,18 +120,13 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode,
 				   BTRFS_FILE_EXTENT_INLINE);
 	ptr = btrfs_file_extent_inline_start(ei);
 
-	kaddr = kmap_atomic(bh->b_page, KM_USER0);
+	kaddr = kmap_atomic(page, KM_USER0);
 	btrfs_memcpy(root, path->nodes[0]->b_data,
-		     ptr, kaddr + bh_offset(bh),
-		     size);
+		     ptr, kaddr + page_offset, size);
 	kunmap_atomic(kaddr, KM_USER0);
 	btrfs_mark_buffer_dirty(path->nodes[0]);
 fail:
 	btrfs_free_path(path);
-	ret = btrfs_end_transaction(trans, root);
-	if (ret && !err)
-		err = ret;
-	mutex_unlock(&root->fs_info->fs_mutex);
 	return err;
 }
 
@@ -145,45 +138,143 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 				   loff_t pos,
 				   size_t write_bytes)
 {
-	int i;
-	int offset;
 	int err = 0;
-	int ret;
-	int this_write;
+	int i;
 	struct inode *inode = file->f_path.dentry->d_inode;
-	struct buffer_head *bh;
+	struct extent_map *em;
+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct btrfs_key ins;
+	u64 hint_block;
+	u64 num_blocks;
+	u64 start_pos;
+	u64 end_of_last_block;
+	u64 end_pos = pos + write_bytes;
+	loff_t isize = i_size_read(inode);
 
-	for (i = 0; i < num_pages; i++) {
-		offset = pos & (PAGE_CACHE_SIZE -1);
-		this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
+	em = alloc_extent_map(GFP_NOFS);
+	if (!em)
+		return -ENOMEM;
 
-		/* FIXME, one block at a time */
-		bh = page_buffers(pages[i]);
+	em->bdev = inode->i_sb->s_bdev;
 
-		if (buffer_mapped(bh) && bh->b_blocknr == 0) {
-			ret = insert_inline_extent(root, inode,
-					pages[i]->index << PAGE_CACHE_SHIFT,
-					offset + this_write, bh);
-			if (ret) {
-				err = ret;
-				goto failed;
-			}
-		}
+	start_pos = pos & ~((u64)root->blocksize - 1);
+	num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
+			inode->i_blkbits;
 
-		ret = btrfs_commit_write(file, pages[i], offset,
-					 offset + this_write);
-		pos += this_write;
-		if (ret) {
-			err = ret;
+	end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1;
+	mutex_lock(&root->fs_info->fs_mutex);
+	trans = btrfs_start_transaction(root, 1);
+	if (!trans) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+	btrfs_set_trans_block_group(trans, inode);
+	inode->i_blocks += num_blocks << 3;
+	hint_block = 0;
+
+	if ((end_of_last_block & 4095) == 0) {
+		printk("strange end of last %Lu %lu %Lu\n", start_pos, write_bytes, end_of_last_block);
+	}
+	set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS);
+
+	/* FIXME...EIEIO, ENOSPC and more */
+
+	/* step one, delete the existing extents in this range */
+	/* FIXME blocksize != pagesize */
+	if (start_pos < inode->i_size) {
+		err = btrfs_drop_extents(trans, root, inode,
+			 start_pos, (pos + write_bytes + root->blocksize -1) &
+			 ~((u64)root->blocksize - 1), &hint_block);
+		if (err)
+			goto failed;
+	}
+
+	/* insert any holes we need to create */
+	if (inode->i_size < start_pos) {
+		u64 last_pos_in_file;
+		u64 hole_size;
+		u64 mask = root->blocksize - 1;
+		last_pos_in_file = (isize + mask) & ~mask;
+		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
+		hole_size >>= inode->i_blkbits;
+		if (last_pos_in_file < start_pos) {
+			err = btrfs_insert_file_extent(trans, root,
+						       inode->i_ino,
+						       last_pos_in_file,
+						       0, 0, hole_size);
+		}
+		if (err)
 			goto failed;
+	}
+
+	/*
+	 * either allocate an extent for the new bytes or setup the key
+	 * to show we are doing inline data in the extent
+	 */
+	if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
+	    pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
+		err = btrfs_alloc_extent(trans, root, inode->i_ino,
+					 num_blocks, 0, hint_block, (u64)-1,
+					 &ins, 1);
+		BUG_ON(err);
+		err = btrfs_insert_file_extent(trans, root, inode->i_ino,
+				       start_pos, ins.objectid, ins.offset,
+				       ins.offset);
+		BUG_ON(err);
+		em->start = start_pos;
+		em->end = end_of_last_block;
+		em->block_start = ins.objectid << inode->i_blkbits;
+		em->block_end = em->block_start +
+			(ins.offset << inode->i_blkbits) - 1;
+		set_extent_dirty(em_tree, start_pos, end_of_last_block,
+				 GFP_NOFS);
+		err = add_extent_mapping(em_tree, em);
+		for (i = 0; i < num_pages; i++) {
+			struct page *p = pages[i];
+			SetPageUptodate(p);
+			__set_page_dirty_nobuffers(p);
 		}
-		WARN_ON(this_write > write_bytes);
-		write_bytes -= this_write;
+	} else {
+		struct page *p = pages[0];
+		err = insert_inline_extent(trans, root, inode, start_pos,
+					   end_pos - start_pos, p, 0);
+		BUG_ON(err);
+		em->start = start_pos;
+		em->end = end_pos;
+		em->block_start = EXTENT_MAP_INLINE;
+		em->block_end = EXTENT_MAP_INLINE;
+		add_extent_mapping(em_tree, em);
+	}
+	if (end_pos > isize) {
+		i_size_write(inode, end_pos);
+		btrfs_update_inode(trans, root, inode);
 	}
 failed:
+	err = btrfs_end_transaction(trans, root);
+out_unlock:
+	mutex_unlock(&root->fs_info->fs_mutex);
+	free_extent_map(em);
 	return err;
 }
 
+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
+{
+	struct extent_map *em;
+	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+
+	while(1) {
+		em = lookup_extent_mapping(em_tree, start, end);
+		if (!em)
+			break;
+		remove_extent_mapping(em_tree, em);
+		/* once for us */
+		free_extent_map(em);
+		/* once for the tree*/
+		free_extent_map(em);
+	}
+	return 0;
+}
+
 /*
  * this is very complex, but the basic idea is to drop all extents
  * in the range start - end.  hint_block is filled in with a block number
@@ -213,6 +304,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	int found_inline;
 	int recow;
 
+	btrfs_drop_extent_cache(inode, start, end - 1);
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -434,18 +527,9 @@ static int prepare_pages(struct btrfs_root *root,
 	int i;
 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
 	struct inode *inode = file->f_path.dentry->d_inode;
-	int offset;
 	int err = 0;
-	int this_write;
-	struct buffer_head *bh;
-	struct buffer_head *head;
-	loff_t isize = i_size_read(inode);
-	struct btrfs_trans_handle *trans;
-	u64 hint_block;
 	u64 num_blocks;
-	u64 alloc_extent_start;
 	u64 start_pos;
-	struct btrfs_key ins;
 
 	start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
 	num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
@@ -457,119 +541,17 @@ static int prepare_pages(struct btrfs_root *root,
 		pages[i] = grab_cache_page(inode->i_mapping, index + i);
 		if (!pages[i]) {
 			err = -ENOMEM;
-			goto failed_release;
+			BUG_ON(1);
 		}
 		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
 		wait_on_page_writeback(pages[i]);
-	}
-
-	mutex_lock(&root->fs_info->fs_mutex);
-	trans = btrfs_start_transaction(root, 1);
-	if (!trans) {
-		err = -ENOMEM;
-		mutex_unlock(&root->fs_info->fs_mutex);
-		goto out_unlock;
-	}
-	btrfs_set_trans_block_group(trans, inode);
-	/* FIXME blocksize != 4096 */
-	inode->i_blocks += num_blocks << 3;
-	hint_block = 0;
-
-	/* FIXME...EIEIO, ENOSPC and more */
-
-	/* step one, delete the existing extents in this range */
-	/* FIXME blocksize != pagesize */
-	if (start_pos < inode->i_size) {
-		err = btrfs_drop_extents(trans, root, inode,
-			 start_pos, (pos + write_bytes + root->blocksize -1) &
-			 ~((u64)root->blocksize - 1), &hint_block);
-		if (err)
-			goto failed_release;
-	}
-
-	/* insert any holes we need to create */
-	if (inode->i_size < start_pos) {
-		u64 last_pos_in_file;
-		u64 hole_size;
-		u64 mask = root->blocksize - 1;
-		last_pos_in_file = (isize + mask) & ~mask;
-		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
-		hole_size >>= inode->i_blkbits;
-		if (last_pos_in_file < start_pos) {
-			err = btrfs_insert_file_extent(trans, root,
-						       inode->i_ino,
-						       last_pos_in_file,
-						       0, 0, hole_size);
-		}
-		if (err)
-			goto failed_release;
-	}
-
-	/*
-	 * either allocate an extent for the new bytes or setup the key
-	 * to show we are doing inline data in the extent
-	 */
-	if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
-	    pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
-		err = btrfs_alloc_extent(trans, root, inode->i_ino,
-					 num_blocks, 0, hint_block, (u64)-1,
-					 &ins, 1);
-		if (err)
-			goto failed_truncate;
-		err = btrfs_insert_file_extent(trans, root, inode->i_ino,
-				       start_pos, ins.objectid, ins.offset,
-				       ins.offset);
-		if (err)
-			goto failed_truncate;
-	} else {
-		ins.offset = 0;
-		ins.objectid = 0;
-	}
-	BUG_ON(err);
-	alloc_extent_start = ins.objectid;
-	err = btrfs_end_transaction(trans, root);
-	mutex_unlock(&root->fs_info->fs_mutex);
-
-	for (i = 0; i < num_pages; i++) {
-		offset = pos & (PAGE_CACHE_SIZE -1);
-		this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes);
-		if (!page_has_buffers(pages[i])) {
-			create_empty_buffers(pages[i],
-					     root->fs_info->sb->s_blocksize,
-					     (1 << BH_Uptodate));
+		if (!PagePrivate(pages[i])) {
+			SetPagePrivate(pages[i]);
+			set_page_private(pages[i], 1);
+			page_cache_get(pages[i]);
 		}
-		head = page_buffers(pages[i]);
-		bh = head;
-		do {
-			err = btrfs_map_bh_to_logical(root, bh,
-						      alloc_extent_start);
-			BUG_ON(err);
-			if (err)
-				goto failed_truncate;
-			bh = bh->b_this_page;
-			if (alloc_extent_start)
-				alloc_extent_start++;
-		} while (bh != head);
-		pos += this_write;
-		WARN_ON(this_write > write_bytes);
-		write_bytes -= this_write;
 	}
 	return 0;
-
-failed_release:
-	btrfs_drop_pages(pages, num_pages);
-	return err;
-
-failed_truncate:
-	btrfs_drop_pages(pages, num_pages);
-	if (pos > isize)
-		vmtruncate(inode, isize);
-	return err;
-
-out_unlock:
-	mutex_unlock(&root->fs_info->fs_mutex);
-	goto failed_release;
-
 }
 
 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
@@ -685,7 +667,6 @@ out:
 		page_cache_release(pinned[1]);
 	*ppos = pos;
 	current->backing_dev_info = NULL;
-	mark_inode_dirty(inode);
 	return num_written ? num_written : err;
 }
 
@@ -714,8 +695,8 @@ static int btrfs_sync_file(struct file *file,
 	mutex_unlock(&root->fs_info->trans_mutex);
 
 	/*
- 	 * ok we haven't committed the transaction yet, lets do a commit
- 	 */
+	 * ok we haven't committed the transaction yet, lets do a commit
+	 */
 	trans = btrfs_start_transaction(root, 1);
 	if (!trans) {
 		ret = -ENOMEM;
-- 
cgit v1.2.2


From b888db2bd7b67f190b32934e6a86181f262ac3ec Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 27 Aug 2007 16:49:44 -0400
Subject: Btrfs: Add delayed allocation to the extent based page tree code

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 50 +++++++++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 71a481894ab6..d3d39e4a2797 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -143,7 +143,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct extent_map *em;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-	struct btrfs_key ins;
 	u64 hint_block;
 	u64 num_blocks;
 	u64 start_pos;
@@ -162,6 +161,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			inode->i_blkbits;
 
 	end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1;
+	lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
 	mutex_lock(&root->fs_info->fs_mutex);
 	trans = btrfs_start_transaction(root, 1);
 	if (!trans) {
@@ -179,16 +179,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 
 	/* FIXME...EIEIO, ENOSPC and more */
 
-	/* step one, delete the existing extents in this range */
-	/* FIXME blocksize != pagesize */
-	if (start_pos < inode->i_size) {
-		err = btrfs_drop_extents(trans, root, inode,
-			 start_pos, (pos + write_bytes + root->blocksize -1) &
-			 ~((u64)root->blocksize - 1), &hint_block);
-		if (err)
-			goto failed;
-	}
-
 	/* insert any holes we need to create */
 	if (inode->i_size < start_pos) {
 		u64 last_pos_in_file;
@@ -213,29 +203,28 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	 */
 	if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
 	    pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
-		err = btrfs_alloc_extent(trans, root, inode->i_ino,
-					 num_blocks, 0, hint_block, (u64)-1,
-					 &ins, 1);
-		BUG_ON(err);
-		err = btrfs_insert_file_extent(trans, root, inode->i_ino,
-				       start_pos, ins.objectid, ins.offset,
-				       ins.offset);
-		BUG_ON(err);
-		em->start = start_pos;
-		em->end = end_of_last_block;
-		em->block_start = ins.objectid << inode->i_blkbits;
-		em->block_end = em->block_start +
-			(ins.offset << inode->i_blkbits) - 1;
-		set_extent_dirty(em_tree, start_pos, end_of_last_block,
-				 GFP_NOFS);
-		err = add_extent_mapping(em_tree, em);
+		u64 last_end;
 		for (i = 0; i < num_pages; i++) {
 			struct page *p = pages[i];
 			SetPageUptodate(p);
-			__set_page_dirty_nobuffers(p);
+			set_page_dirty(p);
 		}
+		last_end = pages[num_pages -1]->index << PAGE_CACHE_SHIFT;
+		last_end += PAGE_CACHE_SIZE - 1;
+		set_extent_delalloc(em_tree, start_pos, end_of_last_block,
+				 GFP_NOFS);
 	} else {
 		struct page *p = pages[0];
+		/* step one, delete the existing extents in this range */
+		/* FIXME blocksize != pagesize */
+		if (start_pos < inode->i_size) {
+			err = btrfs_drop_extents(trans, root, inode, start_pos,
+				 (pos + write_bytes + root->blocksize -1) &
+				 ~((u64)root->blocksize - 1), &hint_block);
+			if (err)
+				goto failed;
+		}
+
 		err = insert_inline_extent(trans, root, inode, start_pos,
 					   end_pos - start_pos, p, 0);
 		BUG_ON(err);
@@ -253,6 +242,7 @@ failed:
 	err = btrfs_end_transaction(trans, root);
 out_unlock:
 	mutex_unlock(&root->fs_info->fs_mutex);
+	unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
 	free_extent_map(em);
 	return err;
 }
@@ -531,7 +521,7 @@ static int prepare_pages(struct btrfs_root *root,
 	u64 num_blocks;
 	u64 start_pos;
 
-	start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
+	start_pos = pos & ~((u64)root->blocksize - 1);
 	num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
 			inode->i_blkbits;
 
@@ -548,8 +538,10 @@ static int prepare_pages(struct btrfs_root *root,
 		if (!PagePrivate(pages[i])) {
 			SetPagePrivate(pages[i]);
 			set_page_private(pages[i], 1);
+			WARN_ON(!pages[i]->mapping->a_ops->invalidatepage);
 			page_cache_get(pages[i]);
 		}
+		WARN_ON(!PageLocked(pages[i]));
 	}
 	return 0;
 }
-- 
cgit v1.2.2


From 2bf5a725a3b82efeaf7b292c085e69a9388a89ea Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 30 Aug 2007 11:54:02 -0400
Subject: Btrfs: fsx delalloc fixes

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d3d39e4a2797..07b121d4bd93 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -186,8 +186,16 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		u64 mask = root->blocksize - 1;
 		last_pos_in_file = (isize + mask) & ~mask;
 		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
-		hole_size >>= inode->i_blkbits;
+
 		if (last_pos_in_file < start_pos) {
+			err = btrfs_drop_extents(trans, root, inode,
+						 last_pos_in_file,
+						 last_pos_in_file + hole_size,
+						 &hint_block);
+			if (err)
+				goto failed;
+
+			hole_size >>= inode->i_blkbits;
 			err = btrfs_insert_file_extent(trans, root,
 						       inode->i_ino,
 						       last_pos_in_file,
@@ -217,13 +225,11 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		struct page *p = pages[0];
 		/* step one, delete the existing extents in this range */
 		/* FIXME blocksize != pagesize */
-		if (start_pos < inode->i_size) {
-			err = btrfs_drop_extents(trans, root, inode, start_pos,
-				 (pos + write_bytes + root->blocksize -1) &
-				 ~((u64)root->blocksize - 1), &hint_block);
-			if (err)
-				goto failed;
-		}
+		err = btrfs_drop_extents(trans, root, inode, start_pos,
+			 (pos + write_bytes + root->blocksize -1) &
+			 ~((u64)root->blocksize - 1), &hint_block);
+		if (err)
+			goto failed;
 
 		err = insert_inline_extent(trans, root, inode, start_pos,
 					   end_pos - start_pos, p, 0);
@@ -400,6 +406,8 @@ next_slot:
 			keep = 1;
 			WARN_ON(start & (root->blocksize - 1));
 			if (found_extent) {
+				btrfs_drop_extent_cache(inode, key.offset,
+							start - 1 );
 				new_num = (start - key.offset) >>
 					inode->i_blkbits;
 				old_num = btrfs_file_extent_num_blocks(extent);
@@ -464,7 +472,7 @@ next_slot:
 
 			if (ret) {
 				btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0]));
-				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end);
+				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end, keep);
 			}
 			BUG_ON(ret);
 			extent = btrfs_item_ptr(
-- 
cgit v1.2.2


From 8e21528f87854314792aaef4d279bc9e5a9be997 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 30 Aug 2007 12:16:51 -0400
Subject: Btrfs: remove extra drop_extent_cache call

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 07b121d4bd93..acef8cb7fb79 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -406,8 +406,6 @@ next_slot:
 			keep = 1;
 			WARN_ON(start & (root->blocksize - 1));
 			if (found_extent) {
-				btrfs_drop_extent_cache(inode, key.offset,
-							start - 1 );
 				new_num = (start - key.offset) >>
 					inode->i_blkbits;
 				old_num = btrfs_file_extent_num_blocks(extent);
-- 
cgit v1.2.2


From 011410bd859a481a335d7db1fb559542c5663fd5 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 10 Sep 2007 19:58:36 -0400
Subject: Btrfs: Add more synchronization before creating a snapshot

File data checksums are only done during writepage, so we have to make sure
all pages are written when the snapshot is taken.  This also adds some
locking so that new writes don't race in and add new dirty pages.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index acef8cb7fb79..77f1950b0aa0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -160,6 +160,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
 			inode->i_blkbits;
 
+	down_read(&BTRFS_I(inode)->root->snap_sem);
 	end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1;
 	lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
 	mutex_lock(&root->fs_info->fs_mutex);
@@ -250,6 +251,7 @@ out_unlock:
 	mutex_unlock(&root->fs_info->fs_mutex);
 	unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
 	free_extent_map(em);
+	up_read(&BTRFS_I(inode)->root->snap_sem);
 	return err;
 }
 
-- 
cgit v1.2.2


From 9433063bcb4b3c0c9e5ef3c24a811b63084dedb4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 10 Sep 2007 20:02:22 -0400
Subject: [PATCH] btrfs: fix printk format warning

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77f1950b0aa0..c26ca54ed4de 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -174,7 +174,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	hint_block = 0;
 
 	if ((end_of_last_block & 4095) == 0) {
-		printk("strange end of last %Lu %lu %Lu\n", start_pos, write_bytes, end_of_last_block);
+		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
 	}
 	set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS);
 
-- 
cgit v1.2.2


From 34287aa3605d52ef5a4b6e5e2cca98667166508b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 14 Sep 2007 10:22:47 -0400
Subject: Btrfs: use unlocked_ioctl

No reason to grab the BKL before calling into the btrfs ioctl code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c26ca54ed4de..ca7e5d4474e0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -732,10 +732,10 @@ struct file_operations btrfs_file_operations = {
 	.write		= btrfs_file_write,
 	.mmap		= btrfs_file_mmap,
 	.open		= generic_file_open,
-	.ioctl		= btrfs_ioctl,
 	.fsync		= btrfs_sync_file,
+	.unlocked_ioctl	= btrfs_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= btrfs_compat_ioctl,
+	.compat_ioctl	= btrfs_ioctl,
 #endif
 };
 
-- 
cgit v1.2.2


From 6af858b24c0c4e91ce0b40b5dd5fdfc1fe2dda70 Mon Sep 17 00:00:00 2001
From: Yan <yanzheng@21cn.com>
Date: Fri, 14 Sep 2007 10:23:29 -0400
Subject: Btrfs: Fix off by one error in dirty_and_release_pages

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ca7e5d4474e0..26826a543355 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -236,7 +236,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 					   end_pos - start_pos, p, 0);
 		BUG_ON(err);
 		em->start = start_pos;
-		em->end = end_pos;
+		em->end = end_pos - 1;
 		em->block_start = EXTENT_MAP_INLINE;
 		em->block_end = EXTENT_MAP_INLINE;
 		add_extent_mapping(em_tree, em);
-- 
cgit v1.2.2


From d3c2fdcf7b79079f60ac64e61d886964d4647910 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 17 Sep 2007 10:58:06 -0400
Subject: Btrfs: Use balance_dirty_pages_nr on btree blocks

btrfs_btree_balance_dirty is changed to pass the number of pages dirtied
for more accurate dirty throttling.  This lets the VM make better decisions
about when to force some writeback.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 26826a543355..698eaea612f1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -655,7 +655,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		num_written += write_bytes;
 
 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
-		btrfs_btree_balance_dirty(root);
+		btrfs_btree_balance_dirty(root, 1);
 		cond_resched();
 	}
 	mutex_unlock(&inode->i_mutex);
-- 
cgit v1.2.2


From b3cfa35a493866cffd00952438b6c77364d42d3c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 17 Sep 2007 11:25:58 -0400
Subject: Btrfs: factor page private preparations into a helper

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 698eaea612f1..4cc459c943ec 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -543,12 +543,7 @@ static int prepare_pages(struct btrfs_root *root,
 		}
 		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
 		wait_on_page_writeback(pages[i]);
-		if (!PagePrivate(pages[i])) {
-			SetPagePrivate(pages[i]);
-			set_page_private(pages[i], 1);
-			WARN_ON(!pages[i]->mapping->a_ops->invalidatepage);
-			page_cache_get(pages[i]);
-		}
+		set_page_extent_mapped(pages[i]);
 		WARN_ON(!PageLocked(pages[i]));
 	}
 	return 0;
-- 
cgit v1.2.2


From 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 15 Oct 2007 16:14:19 -0400
Subject: Btrfs: Create extent_buffer interface for large blocksizes

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 129 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 67 insertions(+), 62 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4cc459c943ec..1734ca695555 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -16,7 +16,6 @@
  * Boston, MA 021110-1307, USA.
  */
 
-#include <linux/buffer_head.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
@@ -88,7 +87,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_key key;
 	struct btrfs_path *path;
-	char *ptr, *kaddr;
+	struct extent_buffer *leaf;
+	char *kaddr;
+	unsigned long ptr;
 	struct btrfs_file_extent_item *ei;
 	u32 datasize;
 	int err = 0;
@@ -102,7 +103,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 
 	key.objectid = inode->i_ino;
 	key.offset = offset;
-	key.flags = 0;
 	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
 	BUG_ON(size >= PAGE_CACHE_SIZE);
 	datasize = btrfs_file_extent_calc_inline_size(size);
@@ -113,18 +113,17 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 		err = ret;
 		goto fail;
 	}
-	ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
-	       path->slots[0], struct btrfs_file_extent_item);
-	btrfs_set_file_extent_generation(ei, trans->transid);
-	btrfs_set_file_extent_type(ei,
-				   BTRFS_FILE_EXTENT_INLINE);
+	leaf = path->nodes[0];
+	ei = btrfs_item_ptr(leaf, path->slots[0],
+			    struct btrfs_file_extent_item);
+	btrfs_set_file_extent_generation(leaf, ei, trans->transid);
+	btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
 	ptr = btrfs_file_extent_inline_start(ei);
 
 	kaddr = kmap_atomic(page, KM_USER0);
-	btrfs_memcpy(root, path->nodes[0]->b_data,
-		     ptr, kaddr + page_offset, size);
+	write_extent_buffer(leaf, kaddr + page_offset, ptr, size);
 	kunmap_atomic(kaddr, KM_USER0);
-	btrfs_mark_buffer_dirty(path->nodes[0]);
+	btrfs_mark_buffer_dirty(leaf);
 fail:
 	btrfs_free_path(path);
 	return err;
@@ -156,8 +155,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 
 	em->bdev = inode->i_sb->s_bdev;
 
-	start_pos = pos & ~((u64)root->blocksize - 1);
-	num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
+	start_pos = pos & ~((u64)root->sectorsize - 1);
+	num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >>
 			inode->i_blkbits;
 
 	down_read(&BTRFS_I(inode)->root->snap_sem);
@@ -184,7 +183,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	if (inode->i_size < start_pos) {
 		u64 last_pos_in_file;
 		u64 hole_size;
-		u64 mask = root->blocksize - 1;
+		u64 mask = root->sectorsize - 1;
 		last_pos_in_file = (isize + mask) & ~mask;
 		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
 
@@ -227,8 +226,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		/* step one, delete the existing extents in this range */
 		/* FIXME blocksize != pagesize */
 		err = btrfs_drop_extents(trans, root, inode, start_pos,
-			 (pos + write_bytes + root->blocksize -1) &
-			 ~((u64)root->blocksize - 1), &hint_block);
+			 (pos + write_bytes + root->sectorsize -1) &
+			 ~((u64)root->sectorsize - 1), &hint_block);
 		if (err)
 			goto failed;
 
@@ -288,7 +287,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 {
 	int ret;
 	struct btrfs_key key;
-	struct btrfs_leaf *leaf;
+	struct extent_buffer *leaf;
 	int slot;
 	struct btrfs_file_extent_item *extent;
 	u64 extent_end = 0;
@@ -327,10 +326,10 @@ next_slot:
 		found_extent = 0;
 		found_inline = 0;
 		extent = NULL;
-		leaf = btrfs_buffer_leaf(path->nodes[0]);
+		leaf = path->nodes[0];
 		slot = path->slots[0];
 		ret = 0;
-		btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
+		btrfs_item_key_to_cpu(leaf, &key, slot);
 		if (key.offset >= end || key.objectid != inode->i_ino) {
 			goto out;
 		}
@@ -344,17 +343,18 @@ next_slot:
 		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
 			extent = btrfs_item_ptr(leaf, slot,
 						struct btrfs_file_extent_item);
-			found_type = btrfs_file_extent_type(extent);
+			found_type = btrfs_file_extent_type(leaf, extent);
 			if (found_type == BTRFS_FILE_EXTENT_REG) {
 				extent_end = key.offset +
-					(btrfs_file_extent_num_blocks(extent) <<
+				 (btrfs_file_extent_num_blocks(leaf, extent) <<
 					 inode->i_blkbits);
 				found_extent = 1;
 			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+				struct btrfs_item *item;
+				item = btrfs_item_nr(leaf, slot);
 				found_inline = 1;
 				extent_end = key.offset +
-				     btrfs_file_extent_inline_len(leaf->items +
-								  slot);
+				     btrfs_file_extent_inline_len(leaf, item);
 			}
 		} else {
 			extent_end = search_start;
@@ -365,8 +365,7 @@ next_slot:
 		    search_start >= extent_end) {
 			int nextret;
 			u32 nritems;
-			nritems = btrfs_header_nritems(
-					btrfs_buffer_header(path->nodes[0]));
+			nritems = btrfs_header_nritems(leaf);
 			if (slot >= nritems - 1) {
 				nextret = btrfs_next_leaf(root, path);
 				if (nextret)
@@ -380,7 +379,7 @@ next_slot:
 
 		/* FIXME, there's only one inline extent allowed right now */
 		if (found_inline) {
-			u64 mask = root->blocksize - 1;
+			u64 mask = root->sectorsize - 1;
 			search_start = (extent_end + mask) & ~mask;
 		} else
 			search_start = extent_end;
@@ -388,10 +387,13 @@ next_slot:
 		if (end < extent_end && end >= key.offset) {
 			if (found_extent) {
 				u64 disk_blocknr =
-					btrfs_file_extent_disk_blocknr(extent);
+				    btrfs_file_extent_disk_blocknr(leaf,extent);
 				u64 disk_num_blocks =
-				      btrfs_file_extent_disk_num_blocks(extent);
-				memcpy(&old, extent, sizeof(old));
+				    btrfs_file_extent_disk_num_blocks(leaf,
+								      extent);
+				read_extent_buffer(leaf, &old,
+						   (unsigned long)extent,
+						   sizeof(old));
 				if (disk_blocknr != 0) {
 					ret = btrfs_inc_extent_ref(trans, root,
 					         disk_blocknr, disk_num_blocks);
@@ -406,20 +408,24 @@ next_slot:
 			u64 new_num;
 			u64 old_num;
 			keep = 1;
-			WARN_ON(start & (root->blocksize - 1));
+			WARN_ON(start & (root->sectorsize - 1));
 			if (found_extent) {
 				new_num = (start - key.offset) >>
 					inode->i_blkbits;
-				old_num = btrfs_file_extent_num_blocks(extent);
+				old_num = btrfs_file_extent_num_blocks(leaf,
+								       extent);
 				*hint_block =
-					btrfs_file_extent_disk_blocknr(extent);
-				if (btrfs_file_extent_disk_blocknr(extent)) {
+					btrfs_file_extent_disk_blocknr(leaf,
+								       extent);
+				if (btrfs_file_extent_disk_blocknr(leaf,
+								   extent)) {
 					inode->i_blocks -=
 						(old_num - new_num) << 3;
 				}
-				btrfs_set_file_extent_num_blocks(extent,
+				btrfs_set_file_extent_num_blocks(leaf,
+								 extent,
 								 new_num);
-				btrfs_mark_buffer_dirty(path->nodes[0]);
+				btrfs_mark_buffer_dirty(leaf);
 			} else {
 				WARN_ON(1);
 			}
@@ -431,13 +437,17 @@ next_slot:
 			u64 extent_num_blocks = 0;
 			if (found_extent) {
 				disk_blocknr =
-				      btrfs_file_extent_disk_blocknr(extent);
+				      btrfs_file_extent_disk_blocknr(leaf,
+								     extent);
 				disk_num_blocks =
-				      btrfs_file_extent_disk_num_blocks(extent);
+				      btrfs_file_extent_disk_num_blocks(leaf,
+									extent);
 				extent_num_blocks =
-				      btrfs_file_extent_num_blocks(extent);
+				      btrfs_file_extent_num_blocks(leaf,
+								   extent);
 				*hint_block =
-					btrfs_file_extent_disk_blocknr(extent);
+					btrfs_file_extent_disk_blocknr(leaf,
+								       extent);
 			}
 			ret = btrfs_del_item(trans, root, path);
 			/* TODO update progress marker and return */
@@ -464,42 +474,37 @@ next_slot:
 			struct btrfs_key ins;
 			ins.objectid = inode->i_ino;
 			ins.offset = end;
-			ins.flags = 0;
 			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
 			btrfs_release_path(root, path);
 			ret = btrfs_insert_empty_item(trans, root, path, &ins,
 						      sizeof(*extent));
 
+			leaf = path->nodes[0];
 			if (ret) {
-				btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0]));
-				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end, keep);
+				btrfs_print_leaf(root, leaf);
+				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep);
 			}
 			BUG_ON(ret);
-			extent = btrfs_item_ptr(
-				    btrfs_buffer_leaf(path->nodes[0]),
-				    path->slots[0],
-				    struct btrfs_file_extent_item);
-			btrfs_set_file_extent_disk_blocknr(extent,
-				    btrfs_file_extent_disk_blocknr(&old));
-			btrfs_set_file_extent_disk_num_blocks(extent,
-				    btrfs_file_extent_disk_num_blocks(&old));
-
-			btrfs_set_file_extent_offset(extent,
-				    btrfs_file_extent_offset(&old) +
+			extent = btrfs_item_ptr(leaf, path->slots[0],
+						struct btrfs_file_extent_item);
+			write_extent_buffer(leaf, &old,
+					    (unsigned long)extent, sizeof(old));
+
+			btrfs_set_file_extent_offset(leaf, extent,
+				    le64_to_cpu(old.offset) +
 				    ((end - key.offset) >> inode->i_blkbits));
-			WARN_ON(btrfs_file_extent_num_blocks(&old) <
+			WARN_ON(le64_to_cpu(old.num_blocks) <
 				(extent_end - end) >> inode->i_blkbits);
-			btrfs_set_file_extent_num_blocks(extent,
+			btrfs_set_file_extent_num_blocks(leaf, extent,
 				    (extent_end - end) >> inode->i_blkbits);
 
-			btrfs_set_file_extent_type(extent,
+			btrfs_set_file_extent_type(leaf, extent,
 						   BTRFS_FILE_EXTENT_REG);
-			btrfs_set_file_extent_generation(extent,
-				    btrfs_file_extent_generation(&old));
 			btrfs_mark_buffer_dirty(path->nodes[0]);
-			if (btrfs_file_extent_disk_blocknr(&old) != 0) {
+			if (le64_to_cpu(old.disk_blocknr) != 0) {
 				inode->i_blocks +=
-				      btrfs_file_extent_num_blocks(extent) << 3;
+				      btrfs_file_extent_num_blocks(leaf,
+								   extent) << 3;
 			}
 			ret = 0;
 			goto out;
@@ -529,8 +534,8 @@ static int prepare_pages(struct btrfs_root *root,
 	u64 num_blocks;
 	u64 start_pos;
 
-	start_pos = pos & ~((u64)root->blocksize - 1);
-	num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >>
+	start_pos = pos & ~((u64)root->sectorsize - 1);
+	num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >>
 			inode->i_blkbits;
 
 	memset(pages, 0, num_pages * sizeof(struct page *));
-- 
cgit v1.2.2


From db94535db75e67fab12ccbb7f5ee548e33fed891 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 15 Oct 2007 16:15:53 -0400
Subject: Btrfs: Allow tree blocks larger than the page size

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 126 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 60 insertions(+), 66 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1734ca695555..844d8807e44a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -120,9 +120,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 	btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
 	ptr = btrfs_file_extent_inline_start(ei);
 
-	kaddr = kmap_atomic(page, KM_USER0);
+	kaddr = kmap_atomic(page, KM_USER1);
 	write_extent_buffer(leaf, kaddr + page_offset, ptr, size);
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr, KM_USER1);
 	btrfs_mark_buffer_dirty(leaf);
 fail:
 	btrfs_free_path(path);
@@ -142,11 +142,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct extent_map *em;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-	u64 hint_block;
-	u64 num_blocks;
+	u64 hint_byte;
+	u64 num_bytes;
 	u64 start_pos;
 	u64 end_of_last_block;
 	u64 end_pos = pos + write_bytes;
+	u32 inline_size;
 	loff_t isize = i_size_read(inode);
 
 	em = alloc_extent_map(GFP_NOFS);
@@ -156,11 +157,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	em->bdev = inode->i_sb->s_bdev;
 
 	start_pos = pos & ~((u64)root->sectorsize - 1);
-	num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >>
-			inode->i_blkbits;
+	num_bytes = (write_bytes + pos - start_pos +
+		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
 	down_read(&BTRFS_I(inode)->root->snap_sem);
-	end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1;
+	end_of_last_block = start_pos + num_bytes - 1;
+
 	lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
 	mutex_lock(&root->fs_info->fs_mutex);
 	trans = btrfs_start_transaction(root, 1);
@@ -169,8 +171,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		goto out_unlock;
 	}
 	btrfs_set_trans_block_group(trans, inode);
-	inode->i_blocks += num_blocks << 3;
-	hint_block = 0;
+	inode->i_blocks += num_bytes >> 9;
+	hint_byte = 0;
 
 	if ((end_of_last_block & 4095) == 0) {
 		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
@@ -191,11 +193,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			err = btrfs_drop_extents(trans, root, inode,
 						 last_pos_in_file,
 						 last_pos_in_file + hole_size,
-						 &hint_block);
+						 &hint_byte);
 			if (err)
 				goto failed;
 
-			hole_size >>= inode->i_blkbits;
 			err = btrfs_insert_file_extent(trans, root,
 						       inode->i_ino,
 						       last_pos_in_file,
@@ -209,8 +210,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	 * either allocate an extent for the new bytes or setup the key
 	 * to show we are doing inline data in the extent
 	 */
+	inline_size = end_pos - start_pos;
 	if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
-	    pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
+	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
+	    inline_size >= PAGE_CACHE_SIZE) {
 		u64 last_end;
 		for (i = 0; i < num_pages; i++) {
 			struct page *p = pages[i];
@@ -224,10 +227,9 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	} else {
 		struct page *p = pages[0];
 		/* step one, delete the existing extents in this range */
-		/* FIXME blocksize != pagesize */
 		err = btrfs_drop_extents(trans, root, inode, start_pos,
 			 (pos + write_bytes + root->sectorsize -1) &
-			 ~((u64)root->sectorsize - 1), &hint_block);
+			 ~((u64)root->sectorsize - 1), &hint_byte);
 		if (err)
 			goto failed;
 
@@ -283,7 +285,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
  */
 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode,
-		       u64 start, u64 end, u64 *hint_block)
+		       u64 start, u64 end, u64 *hint_byte)
 {
 	int ret;
 	struct btrfs_key key;
@@ -346,8 +348,7 @@ next_slot:
 			found_type = btrfs_file_extent_type(leaf, extent);
 			if (found_type == BTRFS_FILE_EXTENT_REG) {
 				extent_end = key.offset +
-				 (btrfs_file_extent_num_blocks(leaf, extent) <<
-					 inode->i_blkbits);
+				     btrfs_file_extent_num_bytes(leaf, extent);
 				found_extent = 1;
 			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
 				struct btrfs_item *item;
@@ -386,17 +387,17 @@ next_slot:
 
 		if (end < extent_end && end >= key.offset) {
 			if (found_extent) {
-				u64 disk_blocknr =
-				    btrfs_file_extent_disk_blocknr(leaf,extent);
-				u64 disk_num_blocks =
-				    btrfs_file_extent_disk_num_blocks(leaf,
+				u64 disk_bytenr =
+				    btrfs_file_extent_disk_bytenr(leaf, extent);
+				u64 disk_num_bytes =
+				    btrfs_file_extent_disk_num_bytes(leaf,
 								      extent);
 				read_extent_buffer(leaf, &old,
 						   (unsigned long)extent,
 						   sizeof(old));
-				if (disk_blocknr != 0) {
+				if (disk_bytenr != 0) {
 					ret = btrfs_inc_extent_ref(trans, root,
-					         disk_blocknr, disk_num_blocks);
+					         disk_bytenr, disk_num_bytes);
 					BUG_ON(ret);
 				}
 			}
@@ -410,21 +411,19 @@ next_slot:
 			keep = 1;
 			WARN_ON(start & (root->sectorsize - 1));
 			if (found_extent) {
-				new_num = (start - key.offset) >>
-					inode->i_blkbits;
-				old_num = btrfs_file_extent_num_blocks(leaf,
-								       extent);
-				*hint_block =
-					btrfs_file_extent_disk_blocknr(leaf,
-								       extent);
-				if (btrfs_file_extent_disk_blocknr(leaf,
-								   extent)) {
+				new_num = start - key.offset;
+				old_num = btrfs_file_extent_num_bytes(leaf,
+								      extent);
+				*hint_byte =
+					btrfs_file_extent_disk_bytenr(leaf,
+								      extent);
+				if (btrfs_file_extent_disk_bytenr(leaf,
+								  extent)) {
 					inode->i_blocks -=
-						(old_num - new_num) << 3;
+						(old_num - new_num) >> 9;
 				}
-				btrfs_set_file_extent_num_blocks(leaf,
-								 extent,
-								 new_num);
+				btrfs_set_file_extent_num_bytes(leaf, extent,
+								new_num);
 				btrfs_mark_buffer_dirty(leaf);
 			} else {
 				WARN_ON(1);
@@ -432,33 +431,32 @@ next_slot:
 		}
 		/* delete the entire extent */
 		if (!keep) {
-			u64 disk_blocknr = 0;
-			u64 disk_num_blocks = 0;
-			u64 extent_num_blocks = 0;
+			u64 disk_bytenr = 0;
+			u64 disk_num_bytes = 0;
+			u64 extent_num_bytes = 0;
 			if (found_extent) {
-				disk_blocknr =
-				      btrfs_file_extent_disk_blocknr(leaf,
+				disk_bytenr =
+				      btrfs_file_extent_disk_bytenr(leaf,
 								     extent);
-				disk_num_blocks =
-				      btrfs_file_extent_disk_num_blocks(leaf,
-									extent);
-				extent_num_blocks =
-				      btrfs_file_extent_num_blocks(leaf,
-								   extent);
-				*hint_block =
-					btrfs_file_extent_disk_blocknr(leaf,
+				disk_num_bytes =
+				      btrfs_file_extent_disk_num_bytes(leaf,
 								       extent);
+				extent_num_bytes =
+				      btrfs_file_extent_num_bytes(leaf, extent);
+				*hint_byte =
+					btrfs_file_extent_disk_bytenr(leaf,
+								      extent);
 			}
 			ret = btrfs_del_item(trans, root, path);
 			/* TODO update progress marker and return */
 			BUG_ON(ret);
 			btrfs_release_path(root, path);
 			extent = NULL;
-			if (found_extent && disk_blocknr != 0) {
-				inode->i_blocks -= extent_num_blocks << 3;
+			if (found_extent && disk_bytenr != 0) {
+				inode->i_blocks -= extent_num_bytes >> 9;
 				ret = btrfs_free_extent(trans, root,
-							disk_blocknr,
-							disk_num_blocks, 0);
+							disk_bytenr,
+							disk_num_bytes, 0);
 			}
 
 			BUG_ON(ret);
@@ -491,20 +489,19 @@ next_slot:
 					    (unsigned long)extent, sizeof(old));
 
 			btrfs_set_file_extent_offset(leaf, extent,
-				    le64_to_cpu(old.offset) +
-				    ((end - key.offset) >> inode->i_blkbits));
-			WARN_ON(le64_to_cpu(old.num_blocks) <
-				(extent_end - end) >> inode->i_blkbits);
-			btrfs_set_file_extent_num_blocks(leaf, extent,
-				    (extent_end - end) >> inode->i_blkbits);
-
+				    le64_to_cpu(old.offset) + end - key.offset);
+			WARN_ON(le64_to_cpu(old.num_bytes) <
+				(extent_end - end));
+			btrfs_set_file_extent_num_bytes(leaf, extent,
+							extent_end - end);
 			btrfs_set_file_extent_type(leaf, extent,
 						   BTRFS_FILE_EXTENT_REG);
+
 			btrfs_mark_buffer_dirty(path->nodes[0]);
-			if (le64_to_cpu(old.disk_blocknr) != 0) {
+			if (le64_to_cpu(old.disk_bytenr) != 0) {
 				inode->i_blocks +=
-				      btrfs_file_extent_num_blocks(leaf,
-								   extent) << 3;
+				      btrfs_file_extent_num_bytes(leaf,
+								  extent) >> 9;
 			}
 			ret = 0;
 			goto out;
@@ -531,12 +528,9 @@ static int prepare_pages(struct btrfs_root *root,
 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	int err = 0;
-	u64 num_blocks;
 	u64 start_pos;
 
 	start_pos = pos & ~((u64)root->sectorsize - 1);
-	num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >>
-			inode->i_blkbits;
 
 	memset(pages, 0, num_pages * sizeof(struct page *));
 
-- 
cgit v1.2.2


From 3326d1b07c0cb6a2ff5b835b7a2cffa54124d074 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 15 Oct 2007 16:18:25 -0400
Subject: Btrfs: Allow tails larger than one page

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 152 ++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 116 insertions(+), 36 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 844d8807e44a..1af2b6534dad 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -82,8 +82,9 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
 
 static int insert_inline_extent(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode,
-				u64 offset, ssize_t size,
-				struct page *page, size_t page_offset)
+				u64 offset, size_t size,
+				struct page **pages, size_t page_offset,
+				int num_pages)
 {
 	struct btrfs_key key;
 	struct btrfs_path *path;
@@ -91,9 +92,12 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 	char *kaddr;
 	unsigned long ptr;
 	struct btrfs_file_extent_item *ei;
+	struct page *page;
 	u32 datasize;
 	int err = 0;
 	int ret;
+	int i;
+	ssize_t cur_size;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -104,25 +108,97 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 	key.objectid = inode->i_ino;
 	key.offset = offset;
 	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
-	BUG_ON(size >= PAGE_CACHE_SIZE);
-	datasize = btrfs_file_extent_calc_inline_size(size);
+	datasize = btrfs_file_extent_calc_inline_size(offset + size);
 
-	ret = btrfs_insert_empty_item(trans, root, path, &key,
-				      datasize);
-	if (ret) {
+	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+	if (ret < 0) {
 		err = ret;
 		goto fail;
 	}
-	leaf = path->nodes[0];
-	ei = btrfs_item_ptr(leaf, path->slots[0],
-			    struct btrfs_file_extent_item);
-	btrfs_set_file_extent_generation(leaf, ei, trans->transid);
-	btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
-	ptr = btrfs_file_extent_inline_start(ei);
-
-	kaddr = kmap_atomic(page, KM_USER1);
-	write_extent_buffer(leaf, kaddr + page_offset, ptr, size);
-	kunmap_atomic(kaddr, KM_USER1);
+	if (ret == 1) {
+		path->slots[0]--;
+		leaf = path->nodes[0];
+		ei = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+
+		if (btrfs_file_extent_type(leaf, ei) !=
+		    BTRFS_FILE_EXTENT_INLINE) {
+			goto insert;
+		}
+		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+		ret = 0;
+	}
+	if (ret == 0) {
+		u32 found_size;
+		u64 found_start;
+
+		leaf = path->nodes[0];
+		ei = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+
+		if (btrfs_file_extent_type(leaf, ei) !=
+		    BTRFS_FILE_EXTENT_INLINE) {
+			err = ret;
+			btrfs_print_leaf(root, leaf);
+			printk("found wasn't inline offset %Lu inode %lu\n",
+			       offset, inode->i_ino);
+			goto fail;
+		}
+		found_start = key.offset;
+		found_size = btrfs_file_extent_inline_len(leaf,
+					  btrfs_item_nr(leaf, path->slots[0]));
+
+		if (found_size < offset + size) {
+			btrfs_release_path(root, path);
+			ret = btrfs_search_slot(trans, root, &key, path,
+						offset + size - found_size -
+						found_start, 1);
+			BUG_ON(ret != 0);
+			ret = btrfs_extend_item(trans, root, path,
+						offset + size - found_size -
+						found_start);
+			if (ret) {
+				err = ret;
+				goto fail;
+			}
+			leaf = path->nodes[0];
+			ei = btrfs_item_ptr(leaf, path->slots[0],
+					    struct btrfs_file_extent_item);
+		}
+	} else {
+insert:
+		btrfs_release_path(root, path);
+		ret = btrfs_insert_empty_item(trans, root, path, &key,
+					      datasize);
+		if (ret) {
+			err = ret;
+			printk("got bad ret %d\n", ret);
+			goto fail;
+		}
+		leaf = path->nodes[0];
+		ei = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+		btrfs_set_file_extent_generation(leaf, ei, trans->transid);
+		btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
+	}
+	ptr = btrfs_file_extent_inline_start(ei) + offset;
+
+	cur_size = size;
+	i = 0;
+	while (size > 0) {
+		page = pages[i];
+		kaddr = kmap_atomic(page, KM_USER0);
+		cur_size = min(PAGE_CACHE_SIZE - page_offset, size);
+		write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
+		kunmap_atomic(kaddr, KM_USER0);
+		page_offset = 0;
+		ptr += cur_size;
+		size -= cur_size;
+		if (i >= num_pages) {
+			printk("i %d num_pages %d\n", i, num_pages);
+		}
+		i++;
+	}
 	btrfs_mark_buffer_dirty(leaf);
 fail:
 	btrfs_free_path(path);
@@ -193,6 +269,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			err = btrfs_drop_extents(trans, root, inode,
 						 last_pos_in_file,
 						 last_pos_in_file + hole_size,
+						 last_pos_in_file,
 						 &hint_byte);
 			if (err)
 				goto failed;
@@ -210,11 +287,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	 * either allocate an extent for the new bytes or setup the key
 	 * to show we are doing inline data in the extent
 	 */
-	inline_size = end_pos - start_pos;
-	if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
-	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
-	    inline_size >= PAGE_CACHE_SIZE) {
+	inline_size = end_pos;
+	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
+	    inline_size > 16384 ||
+	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
+
 		for (i = 0; i < num_pages; i++) {
 			struct page *p = pages[i];
 			SetPageUptodate(p);
@@ -225,22 +303,18 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		set_extent_delalloc(em_tree, start_pos, end_of_last_block,
 				 GFP_NOFS);
 	} else {
-		struct page *p = pages[0];
+		u64 aligned_end;
 		/* step one, delete the existing extents in this range */
+		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
+			~((u64)root->sectorsize - 1);
 		err = btrfs_drop_extents(trans, root, inode, start_pos,
-			 (pos + write_bytes + root->sectorsize -1) &
-			 ~((u64)root->sectorsize - 1), &hint_byte);
+					 aligned_end, end_pos, &hint_byte);
 		if (err)
 			goto failed;
-
 		err = insert_inline_extent(trans, root, inode, start_pos,
-					   end_pos - start_pos, p, 0);
+					   end_pos - start_pos, pages, 0,
+					   num_pages);
 		BUG_ON(err);
-		em->start = start_pos;
-		em->end = end_pos - 1;
-		em->block_start = EXTENT_MAP_INLINE;
-		em->block_end = EXTENT_MAP_INLINE;
-		add_extent_mapping(em_tree, em);
 	}
 	if (end_pos > isize) {
 		i_size_write(inode, end_pos);
@@ -285,7 +359,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
  */
 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode,
-		       u64 start, u64 end, u64 *hint_byte)
+		       u64 start, u64 end, u64 inline_end, u64 *hint_byte)
 {
 	int ret;
 	struct btrfs_key key;
@@ -401,8 +475,8 @@ next_slot:
 					BUG_ON(ret);
 				}
 			}
-			WARN_ON(found_inline);
-			bookend = 1;
+			if (!found_inline)
+				bookend = 1;
 		}
 		/* truncate existing extent */
 		if (start > key.offset) {
@@ -425,8 +499,14 @@ next_slot:
 				btrfs_set_file_extent_num_bytes(leaf, extent,
 								new_num);
 				btrfs_mark_buffer_dirty(leaf);
-			} else {
-				WARN_ON(1);
+			} else if (end > extent_end &&
+				   key.offset < inline_end &&
+				   inline_end < extent_end) {
+				u32 new_size;
+				new_size = btrfs_file_extent_calc_inline_size(
+						   inline_end - key.offset);
+				btrfs_truncate_item(trans, root, path,
+						    new_size);
 			}
 		}
 		/* delete the entire extent */
-- 
cgit v1.2.2


From 810191ff3087e8143b41a944fcf4fd8c693f00e3 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 15 Oct 2007 16:18:55 -0400
Subject: Btrfs: extent_map optimizations to cut down on CPU usage

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1af2b6534dad..fe28404ae7f4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -289,7 +289,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	 */
 	inline_size = end_pos;
 	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
-	    inline_size > 16384 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
 
-- 
cgit v1.2.2


From ff79f8190b6e955ff7a71faf804a3017d526e657 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 15 Oct 2007 16:22:25 -0400
Subject: Btrfs: Add back file data checksumming

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fe28404ae7f4..1af2b6534dad 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -289,6 +289,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	 */
 	inline_size = end_pos;
 	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
+	    inline_size > 16384 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
 
-- 
cgit v1.2.2


From 7936ca3883e5fef8ce5cc367a4356ad5fed67180 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 19 Oct 2007 09:22:41 -0400
Subject: Btrfs: Default to 8k max packed tails

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1af2b6534dad..4aacf99bd97f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -289,7 +289,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	 */
 	inline_size = end_pos;
 	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
-	    inline_size > 16384 ||
+	    inline_size > 8192 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
 
-- 
cgit v1.2.2


From ae2f5411c4ce7180cca8418853db50c8e52d40db Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 19 Oct 2007 09:22:59 -0400
Subject: btrfs: 32-bit type problems

An assorted set of casts to get rid of the warnings on 32-bit archs.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4aacf99bd97f..96df1b10cb60 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -188,7 +188,7 @@ insert:
 	while (size > 0) {
 		page = pages[i];
 		kaddr = kmap_atomic(page, KM_USER0);
-		cur_size = min(PAGE_CACHE_SIZE - page_offset, size);
+		cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size);
 		write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
 		kunmap_atomic(kaddr, KM_USER0);
 		page_offset = 0;
-- 
cgit v1.2.2


From 18f16f7ba62a01c29e09b40ac6ad6d92a8955859 Mon Sep 17 00:00:00 2001
From: Yan <yanzheng@21cn.com>
Date: Thu, 25 Oct 2007 15:42:57 -0400
Subject: Btrfs: Fix for insert_inline_extent to handle offset != 0

This modifies inline extent size calculation, so that
insert_inline_extent can handle the case that parameter 'offset' is
not zero; it also a few codes to zero uninitialized area in inline
extent.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 96df1b10cb60..9260d3478aad 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -108,7 +108,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 	key.objectid = inode->i_ino;
 	key.offset = offset;
 	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
-	datasize = btrfs_file_extent_calc_inline_size(offset + size);
 
 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 	if (ret < 0) {
@@ -130,7 +129,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 	}
 	if (ret == 0) {
 		u32 found_size;
-		u64 found_start;
+		u64 found_end;
 
 		leaf = path->nodes[0];
 		ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -144,19 +143,17 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 			       offset, inode->i_ino);
 			goto fail;
 		}
-		found_start = key.offset;
 		found_size = btrfs_file_extent_inline_len(leaf,
 					  btrfs_item_nr(leaf, path->slots[0]));
+		found_end = key.offset + found_size;
 
-		if (found_size < offset + size) {
+		if (found_end < offset + size) {
 			btrfs_release_path(root, path);
 			ret = btrfs_search_slot(trans, root, &key, path,
-						offset + size - found_size -
-						found_start, 1);
+						offset + size - found_end, 1);
 			BUG_ON(ret != 0);
 			ret = btrfs_extend_item(trans, root, path,
-						offset + size - found_size -
-						found_start);
+						offset + size - found_end);
 			if (ret) {
 				err = ret;
 				goto fail;
@@ -165,9 +162,15 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 			ei = btrfs_item_ptr(leaf, path->slots[0],
 					    struct btrfs_file_extent_item);
 		}
+		if (found_end < offset) {
+			ptr = btrfs_file_extent_inline_start(ei) + found_size;
+			memset_extent_buffer(leaf, 0, ptr, offset - found_end);
+		}
 	} else {
 insert:
 		btrfs_release_path(root, path);
+		datasize = offset + size - key.offset;
+		datasize = btrfs_file_extent_calc_inline_size(datasize);
 		ret = btrfs_insert_empty_item(trans, root, path, &key,
 					      datasize);
 		if (ret) {
@@ -181,7 +184,7 @@ insert:
 		btrfs_set_file_extent_generation(leaf, ei, trans->transid);
 		btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
 	}
-	ptr = btrfs_file_extent_inline_start(ei) + offset;
+	ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
 
 	cur_size = size;
 	i = 0;
-- 
cgit v1.2.2


From 2ff3e9b61d02b03e3157f7d43ba20ee1452814de Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 29 Oct 2007 14:36:41 -0400
Subject: Add O_SYNC support to btrfs_file_write

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9260d3478aad..843e920388ed 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -635,8 +635,9 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
 	loff_t pos;
-	size_t num_written = 0;
-	int err = 0;
+	loff_t start_pos;
+	ssize_t num_written = 0;
+	ssize_t err = 0;
 	int ret = 0;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -652,7 +653,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	pinned[1] = NULL;
 	if (file->f_flags & O_DIRECT)
 		return -EINVAL;
+
 	pos = *ppos;
+	start_pos = pos;
+
 	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
@@ -743,6 +747,13 @@ out:
 	if (pinned[1])
 		page_cache_release(pinned[1]);
 	*ppos = pos;
+
+	if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+		err = sync_page_range(inode, inode->i_mapping,
+				      start_pos, num_written);
+		if (err < 0)
+			num_written = err;
+	}
 	current->backing_dev_info = NULL;
 	return num_written ? num_written : err;
 }
-- 
cgit v1.2.2


From 35ebb934bd7fcc7ca991b155b7980c3c4ff9f1a5 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 30 Oct 2007 16:56:53 -0400
Subject: Btrfs: Fix PAGE_CACHE_SHIFT shifts on 32 bit machines

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 843e920388ed..abdd9caad94e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -301,7 +301,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			SetPageUptodate(p);
 			set_page_dirty(p);
 		}
-		last_end = pages[num_pages -1]->index << PAGE_CACHE_SHIFT;
+		last_end = (u64)(pages[num_pages -1]->index) <<
+				PAGE_CACHE_SHIFT;
 		last_end += PAGE_CACHE_SIZE - 1;
 		set_extent_delalloc(em_tree, start_pos, end_of_last_block,
 				 GFP_NOFS);
-- 
cgit v1.2.2


From 179e29e488cc74f1e9bd67bc45f70b832740e9ec Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 1 Nov 2007 11:28:41 -0400
Subject: Btrfs: Fix a number of inline extent problems that Yan Zheng
 reported.

The fixes do a number of things:

1) Most btrfs_drop_extent callers will try to leave the inline extents in
place.  It can truncate bytes off the beginning of the inline extent if
required.

2) writepage can now update the inline extent, allowing mmap writes to
go directly into the inline extent.

3) btrfs_truncate_in_transaction truncates inline extents

4) extent_map.c fixed to not merge inline extent mappings and hole
mappings together

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index abdd9caad94e..4e52f7ec1cbe 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -115,8 +115,20 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 		goto fail;
 	}
 	if (ret == 1) {
+		struct btrfs_key found_key;
+
+		if (path->slots[0] == 0)
+			goto insert;
+
 		path->slots[0]--;
 		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+		if (found_key.objectid != inode->i_ino)
+			goto insert;
+
+		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
+			goto insert;
 		ei = btrfs_item_ptr(leaf, path->slots[0],
 				    struct btrfs_file_extent_item);
 
@@ -152,6 +164,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
 			ret = btrfs_search_slot(trans, root, &key, path,
 						offset + size - found_end, 1);
 			BUG_ON(ret != 0);
+
 			ret = btrfs_extend_item(trans, root, path,
 						offset + size - found_end);
 			if (ret) {
@@ -292,7 +305,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	 */
 	inline_size = end_pos;
 	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
-	    inline_size > 8192 ||
+	    inline_size > 32768 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
 
@@ -312,7 +325,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
 			~((u64)root->sectorsize - 1);
 		err = btrfs_drop_extents(trans, root, inode, start_pos,
-					 aligned_end, end_pos, &hint_byte);
+					 aligned_end, aligned_end, &hint_byte);
 		if (err)
 			goto failed;
 		err = insert_inline_extent(trans, root, inode, start_pos,
@@ -456,13 +469,15 @@ next_slot:
 			goto next_slot;
 		}
 
-		/* FIXME, there's only one inline extent allowed right now */
 		if (found_inline) {
 			u64 mask = root->sectorsize - 1;
 			search_start = (extent_end + mask) & ~mask;
 		} else
 			search_start = extent_end;
 
+		if (end <= extent_end && start >= key.offset && found_inline) {
+			*hint_byte = EXTENT_MAP_INLINE;
+		}
 		if (end < extent_end && end >= key.offset) {
 			if (found_extent) {
 				u64 disk_bytenr =
@@ -479,8 +494,10 @@ next_slot:
 					BUG_ON(ret);
 				}
 			}
-			if (!found_inline)
-				bookend = 1;
+			bookend = 1;
+			if (found_inline && start <= key.offset &&
+			    inline_end < extent_end)
+				keep = 1;
 		}
 		/* truncate existing extent */
 		if (start > key.offset) {
@@ -510,7 +527,7 @@ next_slot:
 				new_size = btrfs_file_extent_calc_inline_size(
 						   inline_end - key.offset);
 				btrfs_truncate_item(trans, root, path,
-						    new_size);
+						    new_size, 1);
 			}
 		}
 		/* delete the entire extent */
@@ -551,6 +568,13 @@ next_slot:
 			if (!bookend)
 				continue;
 		}
+		if (bookend && found_inline && start <= key.offset &&
+		    inline_end < extent_end) {
+			u32 new_size;
+			new_size = btrfs_file_extent_calc_inline_size(
+						   extent_end - inline_end);
+			btrfs_truncate_item(trans, root, path, new_size, 0);
+		}
 		/* create bookend, splitting the extent in two */
 		if (bookend && found_extent) {
 			struct btrfs_key ins;
-- 
cgit v1.2.2


From dcfec0dcb1b1a037fb26177789e8f108bc429cb3 Mon Sep 17 00:00:00 2001
From: Yan <yanzheng@21cn.com>
Date: Tue, 6 Nov 2007 10:26:26 -0500
Subject: Btrfs: Fix u32 overflow in dirty_and_release_pages.

When calculating the size of inline extent,  inode->i_size should also
be take into consideration, otherwise sys_write may drop some data
silently.  You can test this bug by:

#dd if=/dev/zero bs=4k count=1 of=test_file
#dd if=/dev/zero bs=2k count=1 of=test_file conv=notrunc

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4e52f7ec1cbe..bb98f52f4ea4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -239,9 +239,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	u64 start_pos;
 	u64 end_of_last_block;
 	u64 end_pos = pos + write_bytes;
-	u32 inline_size;
+	u64 inline_size;
 	loff_t isize = i_size_read(inode);
-
 	em = alloc_extent_map(GFP_NOFS);
 	if (!em)
 		return -ENOMEM;
@@ -328,9 +327,11 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 					 aligned_end, aligned_end, &hint_byte);
 		if (err)
 			goto failed;
+		if (isize > inline_size)
+			inline_size = min_t(u64, isize, aligned_end);
+		inline_size -= start_pos;
 		err = insert_inline_extent(trans, root, inode, start_pos,
-					   end_pos - start_pos, pages, 0,
-					   num_pages);
+					   inline_size, pages, 0, num_pages);
 		BUG_ON(err);
 	}
 	if (end_pos > isize) {
-- 
cgit v1.2.2


From a273208edd55463b3bcd8b77a6fe8ba54afc6940 Mon Sep 17 00:00:00 2001
From: Yan <yanzheng@21cn.com>
Date: Tue, 6 Nov 2007 10:26:28 -0500
Subject: Fix EXTENT_MAP_INLINE off by one in btrfs_drop_extents

Don't set hint_byte to EXTENT_MAP_INLINE when 'end == extent_end' or
'start == key.offset' . The inline extent will be truncated in these
cases.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bb98f52f4ea4..5ceaed25dce1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -475,8 +475,7 @@ next_slot:
 			search_start = (extent_end + mask) & ~mask;
 		} else
 			search_start = extent_end;
-
-		if (end <= extent_end && start >= key.offset && found_inline) {
+		if (end < extent_end && start > key.offset && found_inline) {
 			*hint_byte = EXTENT_MAP_INLINE;
 		}
 		if (end < extent_end && end >= key.offset) {
-- 
cgit v1.2.2


From 257d0ce36f215937602eb5c1b328b69afaf3bcd8 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 7 Nov 2007 21:08:16 -0500
Subject: Btrfs: Allow large data extents in a single file to span into
 metadata block groups

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5ceaed25dce1..9dd2c5c23b56 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -439,6 +439,12 @@ next_slot:
 						struct btrfs_file_extent_item);
 			found_type = btrfs_file_extent_type(leaf, extent);
 			if (found_type == BTRFS_FILE_EXTENT_REG) {
+				extent_end =
+				     btrfs_file_extent_disk_bytenr(leaf,
+								   extent);
+				if (extent_end)
+					*hint_byte = extent_end;
+
 				extent_end = key.offset +
 				     btrfs_file_extent_num_bytes(leaf, extent);
 				found_extent = 1;
-- 
cgit v1.2.2


From 448d640b668dae3928591e83f2bf4ca9f4c06e52 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 27 Nov 2007 07:52:01 -0800
Subject: Btrfs: Fine tune the btree writeback exclusion some more

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9dd2c5c23b56..ba624ae16e61 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -767,7 +767,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		num_written += write_bytes;
 
 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
-		btrfs_btree_balance_dirty(root, 1);
+		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+			btrfs_btree_balance_dirty(root, 1);
 		cond_resched();
 	}
 	mutex_unlock(&inode->i_mutex);
-- 
cgit v1.2.2


From 00f5c795fca47d038fedd3f0c9311da3be710c9f Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 30 Nov 2007 10:09:33 -0500
Subject: btrfs_drop_extents: make sure the item is getting smaller before
 truncate

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ba624ae16e61..b0d637787264 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -377,23 +377,23 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
  */
 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode,
-		       u64 start, u64 end, u64 inline_end, u64 *hint_byte)
+		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
 {
-	int ret;
-	struct btrfs_key key;
+	u64 extent_end = 0;
+	u64 search_start = start;
 	struct extent_buffer *leaf;
-	int slot;
 	struct btrfs_file_extent_item *extent;
-	u64 extent_end = 0;
-	int keep;
-	struct btrfs_file_extent_item old;
 	struct btrfs_path *path;
-	u64 search_start = start;
+	struct btrfs_key key;
+	struct btrfs_file_extent_item old;
+	int keep;
+	int slot;
 	int bookend;
 	int found_type;
 	int found_extent;
 	int found_inline;
 	int recow;
+	int ret;
 
 	btrfs_drop_extent_cache(inode, start, end - 1);
 
@@ -502,7 +502,7 @@ next_slot:
 			}
 			bookend = 1;
 			if (found_inline && start <= key.offset &&
-			    inline_end < extent_end)
+			    inline_limit < extent_end)
 				keep = 1;
 		}
 		/* truncate existing extent */
@@ -526,12 +526,12 @@ next_slot:
 				btrfs_set_file_extent_num_bytes(leaf, extent,
 								new_num);
 				btrfs_mark_buffer_dirty(leaf);
-			} else if (end > extent_end &&
-				   key.offset < inline_end &&
-				   inline_end < extent_end) {
+			} else if (key.offset < inline_limit &&
+				   (end > extent_end) &&
+				   (inline_limit < extent_end)) {
 				u32 new_size;
 				new_size = btrfs_file_extent_calc_inline_size(
-						   inline_end - key.offset);
+						   inline_limit - key.offset);
 				btrfs_truncate_item(trans, root, path,
 						    new_size, 1);
 			}
@@ -575,10 +575,10 @@ next_slot:
 				continue;
 		}
 		if (bookend && found_inline && start <= key.offset &&
-		    inline_end < extent_end) {
+		    inline_limit < extent_end && key.offset <= inline_limit) {
 			u32 new_size;
 			new_size = btrfs_file_extent_calc_inline_size(
-						   extent_end - inline_end);
+						   extent_end - inline_limit);
 			btrfs_truncate_item(trans, root, path, new_size, 0);
 		}
 		/* create bookend, splitting the extent in two */
-- 
cgit v1.2.2


From 7bb86316c3961d1bc401ef184fd996f999556c7f Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 11 Dec 2007 09:25:06 -0500
Subject: Btrfs: Add back pointers from extents to the btree or file
 referencing them

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b0d637787264..5b1f90f06e03 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -496,7 +496,10 @@ next_slot:
 						   sizeof(old));
 				if (disk_bytenr != 0) {
 					ret = btrfs_inc_extent_ref(trans, root,
-					         disk_bytenr, disk_num_bytes);
+					         disk_bytenr, disk_num_bytes,
+						 root->root_key.objectid,
+						 trans->transid,
+						 key.objectid, end);
 					BUG_ON(ret);
 				}
 			}
@@ -541,6 +544,14 @@ next_slot:
 			u64 disk_bytenr = 0;
 			u64 disk_num_bytes = 0;
 			u64 extent_num_bytes = 0;
+			u64 root_gen;
+
+			if (leaf != root->node) {
+				root_gen =
+					btrfs_header_generation(path->nodes[1]);
+			} else {
+				root_gen = btrfs_header_generation(leaf);
+			}
 			if (found_extent) {
 				disk_bytenr =
 				      btrfs_file_extent_disk_bytenr(leaf,
@@ -562,8 +573,11 @@ next_slot:
 			if (found_extent && disk_bytenr != 0) {
 				inode->i_blocks -= extent_num_bytes >> 9;
 				ret = btrfs_free_extent(trans, root,
-							disk_bytenr,
-							disk_num_bytes, 0);
+						disk_bytenr,
+						disk_num_bytes,
+						root->root_key.objectid,
+						root_gen, inode->i_ino,
+						key.offset, 0);
 			}
 
 			BUG_ON(ret);
-- 
cgit v1.2.2


From d8d5f3e16d1ae4fe9b93312e083f2c04a95520f0 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 11 Dec 2007 12:42:00 -0500
Subject: Btrfs: Add lowest key information to back refs for extent tree blocks
 as well.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5b1f90f06e03..1cc4d285951c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -545,13 +545,10 @@ next_slot:
 			u64 disk_num_bytes = 0;
 			u64 extent_num_bytes = 0;
 			u64 root_gen;
+			u64 root_owner;
 
-			if (leaf != root->node) {
-				root_gen =
-					btrfs_header_generation(path->nodes[1]);
-			} else {
-				root_gen = btrfs_header_generation(leaf);
-			}
+			root_gen = btrfs_header_generation(leaf);
+			root_owner = btrfs_header_owner(leaf);
 			if (found_extent) {
 				disk_bytenr =
 				      btrfs_file_extent_disk_bytenr(leaf,
@@ -575,7 +572,7 @@ next_slot:
 				ret = btrfs_free_extent(trans, root,
 						disk_bytenr,
 						disk_num_bytes,
-						root->root_key.objectid,
+						root_owner,
 						root_gen, inode->i_ino,
 						key.offset, 0);
 			}
-- 
cgit v1.2.2


From 6e3b96668eb34da6060a5189e6a73e9d62a3b178 Mon Sep 17 00:00:00 2001
From: Yan <yanzheng@21cn.com>
Date: Fri, 14 Dec 2007 11:14:42 -0500
Subject: Off by one fix for btrfs_drop_extents

One of my old patches introduces a new bug to
btrfs_drop_extents(changeset 275). Inline extents are not truncated
properly when "extent_end == end", it can trigger the BUG_ON at
file.c:600.  I hope I don't introduce new bug this time.

---

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1cc4d285951c..a280b9562221 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -481,8 +481,9 @@ next_slot:
 			search_start = (extent_end + mask) & ~mask;
 		} else
 			search_start = extent_end;
-		if (end < extent_end && start > key.offset && found_inline) {
+		if (end <= extent_end && start >= key.offset && found_inline) {
 			*hint_byte = EXTENT_MAP_INLINE;
+			continue;
 		}
 		if (end < extent_end && end >= key.offset) {
 			if (found_extent) {
-- 
cgit v1.2.2


From e9906a98497c26a3c1a7532b51b81cc1fbdf9e1d Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 14 Dec 2007 12:56:58 -0500
Subject: Fixes for loopback files in btrfs

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a280b9562221..62fcd79d8ab3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -861,6 +861,7 @@ struct file_operations btrfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.aio_read       = generic_file_aio_read,
+	.splice_read	= generic_file_splice_read,
 	.write		= btrfs_file_write,
 	.mmap		= btrfs_file_mmap,
 	.open		= generic_file_open,
-- 
cgit v1.2.2


From 6da6abae027e2dbc59bca5f4168b0760f25068c7 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 18 Dec 2007 16:15:09 -0500
Subject: Btrfs: Back port to 2.6.18-el kernels

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 62fcd79d8ab3..461b09663fed 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -231,7 +231,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 {
 	int err = 0;
 	int i;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = fdentry(file)->d_inode;
 	struct extent_map *em;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	u64 hint_byte;
@@ -652,7 +652,7 @@ static int prepare_pages(struct btrfs_root *root,
 {
 	int i;
 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = fdentry(file)->d_inode;
 	int err = 0;
 	u64 start_pos;
 
@@ -666,7 +666,11 @@ static int prepare_pages(struct btrfs_root *root,
 			err = -ENOMEM;
 			BUG_ON(1);
 		}
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+		ClearPageDirty(pages[i]);
+#else
 		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
+#endif
 		wait_on_page_writeback(pages[i]);
 		set_page_extent_mapped(pages[i]);
 		WARN_ON(!PageLocked(pages[i]));
@@ -682,7 +686,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	ssize_t num_written = 0;
 	ssize_t err = 0;
 	int ret = 0;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = fdentry(file)->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct page **pages = NULL;
 	int nrptrs;
@@ -707,7 +711,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		goto out;
 	if (count == 0)
 		goto out;
-	err = remove_suid(file->f_path.dentry);
+	err = remove_suid(fdentry(file));
 	if (err)
 		goto out;
 	file_update_time(file);
@@ -862,6 +866,9 @@ struct file_operations btrfs_file_operations = {
 	.read		= do_sync_read,
 	.aio_read       = generic_file_aio_read,
 	.splice_read	= generic_file_splice_read,
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+	.sendfile	= generic_file_sendfile,
+#endif
 	.write		= btrfs_file_write,
 	.mmap		= btrfs_file_mmap,
 	.open		= generic_file_open,
-- 
cgit v1.2.2


From 1832a6d5ee3b1af61001cadba9e10da9e91af4a4 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 21 Dec 2007 16:27:21 -0500
Subject: Btrfs: Implement basic support for -ENOSPC

This is intended to prevent accidentally filling the drive.  A determined
user can still make things oops.

It includes some accounting of the current bytes under delayed allocation,
but this will change as things get optimized

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 461b09663fed..71dc2d33b6c6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -307,6 +307,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	    inline_size > 32768 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
+		u64 existing_delalloc = 0;
 
 		for (i = 0; i < num_pages; i++) {
 			struct page *p = pages[i];
@@ -316,8 +317,19 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		last_end = (u64)(pages[num_pages -1]->index) <<
 				PAGE_CACHE_SHIFT;
 		last_end += PAGE_CACHE_SIZE - 1;
+		if (start_pos < isize) {
+			u64 delalloc_start = start_pos;
+			existing_delalloc = count_range_bits(em_tree,
+					     &delalloc_start,
+					     end_of_last_block, (u64)-1,
+					     EXTENT_DELALLOC);
+		}
 		set_extent_delalloc(em_tree, start_pos, end_of_last_block,
 				 GFP_NOFS);
+		spin_lock(&root->fs_info->delalloc_lock);
+		root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
+					  start_pos) - existing_delalloc;
+		spin_unlock(&root->fs_info->delalloc_lock);
 	} else {
 		u64 aligned_end;
 		/* step one, delete the existing extents in this range */
@@ -708,12 +720,12 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
 	if (err)
-		goto out;
+		goto out_nolock;
 	if (count == 0)
-		goto out;
+		goto out_nolock;
 	err = remove_suid(fdentry(file));
 	if (err)
-		goto out;
+		goto out_nolock;
 	file_update_time(file);
 
 	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
@@ -758,6 +770,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 
 		WARN_ON(num_pages > nrptrs);
 		memset(pages, 0, sizeof(pages));
+
+		mutex_lock(&root->fs_info->fs_mutex);
+		ret = btrfs_check_free_space(root, write_bytes, 0);
+		mutex_unlock(&root->fs_info->fs_mutex);
+		if (ret)
+			goto out;
+
 		ret = prepare_pages(root, file, pages, num_pages,
 				    pos, first_index, last_index,
 				    write_bytes);
@@ -787,8 +806,9 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 			btrfs_btree_balance_dirty(root, 1);
 		cond_resched();
 	}
-	mutex_unlock(&inode->i_mutex);
 out:
+	mutex_unlock(&inode->i_mutex);
+out_nolock:
 	kfree(pages);
 	if (pinned[0])
 		page_cache_release(pinned[0]);
-- 
cgit v1.2.2


From 98ed51747b63435b9987ef12692a75c223818bbe Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 3 Jan 2008 10:01:48 -0500
Subject: Btrfs: Force inlining off in a few places to save stack usage

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 71dc2d33b6c6..c9ebd4fe8f7e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -80,7 +80,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
 	}
 }
 
-static int insert_inline_extent(struct btrfs_trans_handle *trans,
+static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode,
 				u64 offset, size_t size,
 				struct page **pages, size_t page_offset,
@@ -221,7 +221,7 @@ fail:
 	return err;
 }
 
-static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
+static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
 				   struct file *file,
 				   struct page **pages,
@@ -653,14 +653,10 @@ out:
 /*
  * this gets pages into the page cache and locks them down
  */
-static int prepare_pages(struct btrfs_root *root,
-			 struct file *file,
-			 struct page **pages,
-			 size_t num_pages,
-			 loff_t pos,
-			 unsigned long first_index,
-			 unsigned long last_index,
-			 size_t write_bytes)
+static int prepare_pages(struct btrfs_root *root, struct file *file,
+			 struct page **pages, size_t num_pages,
+			 loff_t pos, unsigned long first_index,
+			 unsigned long last_index, size_t write_bytes)
 {
 	int i;
 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
-- 
cgit v1.2.2


From 5b92ee7204a5fb6542b204831202adbc1a7a851a Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 3 Jan 2008 13:46:11 -0500
Subject: Btrfs: Fix lock ordering of the snapshot semaphore against the page
 lock

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c9ebd4fe8f7e..e862292bdfc6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -251,7 +251,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	num_bytes = (write_bytes + pos - start_pos +
 		    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
-	down_read(&BTRFS_I(inode)->root->snap_sem);
 	end_of_last_block = start_pos + num_bytes - 1;
 
 	lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
@@ -356,7 +355,6 @@ out_unlock:
 	mutex_unlock(&root->fs_info->fs_mutex);
 	unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
 	free_extent_map(em);
-	up_read(&BTRFS_I(inode)->root->snap_sem);
 	return err;
 }
 
@@ -726,6 +724,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 
 	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
 
+	down_read(&BTRFS_I(inode)->root->snap_sem);
+
 	mutex_lock(&inode->i_mutex);
 	first_index = pos >> PAGE_CACHE_SHIFT;
 	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
@@ -804,6 +804,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	}
 out:
 	mutex_unlock(&inode->i_mutex);
+	up_read(&BTRFS_I(inode)->root->snap_sem);
+
 out_nolock:
 	kfree(pages);
 	if (pinned[0])
-- 
cgit v1.2.2


From 2552488345ea0933711010ee2c844792130a4252 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 3 Jan 2008 15:44:57 -0500
Subject: Btrfs: Lower the max inline size to 8k

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e862292bdfc6..94c93373cb7d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -303,7 +303,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	 */
 	inline_size = end_pos;
 	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
-	    inline_size > 32768 ||
+	    inline_size > 8192 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
 		u64 existing_delalloc = 0;
-- 
cgit v1.2.2


From dc17ff8f11d129db9e83ab7244769e4eae05e14d Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 8 Jan 2008 15:46:30 -0500
Subject: Btrfs: Add data=ordered support

This forces file data extents down the disk along with the metadata that
references them.  The current implementation is fairly simple, and just
writes out all of the dirty pages in an inode before the commit.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 94c93373cb7d..0a5f4defe59b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -34,6 +34,7 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
+#include "ordered-data.h"
 #include "ioctl.h"
 #include "print-tree.h"
 
@@ -329,6 +330,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
 					  start_pos) - existing_delalloc;
 		spin_unlock(&root->fs_info->delalloc_lock);
+		btrfs_add_ordered_inode(inode);
 	} else {
 		u64 aligned_end;
 		/* step one, delete the existing extents in this range */
@@ -724,8 +726,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 
 	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
 
-	down_read(&BTRFS_I(inode)->root->snap_sem);
-
 	mutex_lock(&inode->i_mutex);
 	first_index = pos >> PAGE_CACHE_SHIFT;
 	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
@@ -804,7 +804,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	}
 out:
 	mutex_unlock(&inode->i_mutex);
-	up_read(&BTRFS_I(inode)->root->snap_sem);
 
 out_nolock:
 	kfree(pages);
-- 
cgit v1.2.2


From e2008b61401ecb467a8ce1788fcd2116ae1cfbc1 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 8 Jan 2008 15:46:30 -0500
Subject: Btrfs: Add some simple throttling to wait for data=ordered and
 snapshot deletion

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0a5f4defe59b..897242e87fa7 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -800,6 +800,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
 		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
 			btrfs_btree_balance_dirty(root, 1);
+		btrfs_throttle(root);
 		cond_resched();
 	}
 out:
-- 
cgit v1.2.2


From 5f56406aabdf5444d040c5955effc665b1d0dbaf Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 22 Jan 2008 16:47:59 -0500
Subject: Btrfs: Fix hole insertion corner cases

There were a few places that could cause duplicate extent insertion,
this adjusts the code that creates holes to avoid it.

lookup_extent_map is changed to correctly return all of the extents in a
range, even when there are none matching at the start of the range.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 897242e87fa7..1cd8c908811e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -278,7 +278,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		u64 hole_size;
 		u64 mask = root->sectorsize - 1;
 		last_pos_in_file = (isize + mask) & ~mask;
-		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
+		hole_size = (end_pos - last_pos_in_file + mask) & ~mask;
 
 		if (last_pos_in_file < start_pos) {
 			err = btrfs_drop_extents(trans, root, inode,
@@ -293,6 +293,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 						       inode->i_ino,
 						       last_pos_in_file,
 						       0, 0, hole_size);
+			btrfs_check_file(root, inode);
 		}
 		if (err)
 			goto failed;
@@ -378,6 +379,80 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 	return 0;
 }
 
+int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
+{
+	return 0;
+#if 0
+	struct btrfs_path *path;
+	struct btrfs_key found_key;
+	struct extent_buffer *leaf;
+	struct btrfs_file_extent_item *extent;
+	u64 last_offset = 0;
+	int nritems;
+	int slot;
+	int found_type;
+	int ret;
+	int err = 0;
+	u64 extent_end = 0;
+
+	path = btrfs_alloc_path();
+	ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
+				       last_offset, 0);
+	while(1) {
+		nritems = btrfs_header_nritems(path->nodes[0]);
+		if (path->slots[0] >= nritems) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret)
+				goto out;
+			nritems = btrfs_header_nritems(path->nodes[0]);
+		}
+		slot = path->slots[0];
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &found_key, slot);
+		if (found_key.objectid != inode->i_ino)
+			break;
+		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
+			goto out;
+
+		if (found_key.offset != last_offset) {
+			WARN_ON(1);
+			btrfs_print_leaf(root, leaf);
+			printk("inode %lu found offset %Lu expected %Lu\n",
+			       inode->i_ino, found_key.offset, last_offset);
+			err = 1;
+			goto out;
+		}
+		extent = btrfs_item_ptr(leaf, slot,
+					struct btrfs_file_extent_item);
+		found_type = btrfs_file_extent_type(leaf, extent);
+		if (found_type == BTRFS_FILE_EXTENT_REG) {
+			extent_end = found_key.offset +
+			     btrfs_file_extent_num_bytes(leaf, extent);
+		} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+			struct btrfs_item *item;
+			item = btrfs_item_nr(leaf, slot);
+			extent_end = found_key.offset +
+			     btrfs_file_extent_inline_len(leaf, item);
+			extent_end = (extent_end + root->sectorsize - 1) &
+				~((u64)root->sectorsize -1 );
+		}
+		last_offset = extent_end;
+		path->slots[0]++;
+	}
+	if (last_offset < inode->i_size) {
+		WARN_ON(1);
+		btrfs_print_leaf(root, leaf);
+		printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino,
+		       last_offset, inode->i_size);
+		err = 1;
+
+	}
+out:
+	btrfs_free_path(path);
+	return err;
+#endif
+}
+
 /*
  * this is very complex, but the basic idea is to drop all extents
  * in the range start - end.  hint_block is filled in with a block number
@@ -436,6 +511,7 @@ next_slot:
 		slot = path->slots[0];
 		ret = 0;
 		btrfs_item_key_to_cpu(leaf, &key, slot);
+
 		if (key.offset >= end || key.objectid != inode->i_ino) {
 			goto out;
 		}
-- 
cgit v1.2.2


From d1310b2e0cd98eb1348553e69b73827b436dca7b Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 24 Jan 2008 16:13:08 -0500
Subject: Btrfs: Split the extent_map code into two parts

There is now extent_map for mapping offsets in the file to disk and
extent_io for state tracking, IO submission and extent_bufers.

The new extent_map code shifts from [start,end] pairs to [start,len], and
pushes the locking out into the caller.  This allows a few performance
optimizations and is easier to use.

A number of extent_map usage bugs were fixed, mostly with failing
to remove extent_map entries when changing the file.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1cd8c908811e..c5bb00f92396 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -233,8 +233,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	int err = 0;
 	int i;
 	struct inode *inode = fdentry(file)->d_inode;
-	struct extent_map *em;
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	u64 hint_byte;
 	u64 num_bytes;
 	u64 start_pos;
@@ -242,11 +241,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	u64 end_pos = pos + write_bytes;
 	u64 inline_size;
 	loff_t isize = i_size_read(inode);
-	em = alloc_extent_map(GFP_NOFS);
-	if (!em)
-		return -ENOMEM;
-
-	em->bdev = inode->i_sb->s_bdev;
 
 	start_pos = pos & ~((u64)root->sectorsize - 1);
 	num_bytes = (write_bytes + pos - start_pos +
@@ -254,7 +248,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 
 	end_of_last_block = start_pos + num_bytes - 1;
 
-	lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
+	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 	mutex_lock(&root->fs_info->fs_mutex);
 	trans = btrfs_start_transaction(root, 1);
 	if (!trans) {
@@ -268,7 +262,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	if ((end_of_last_block & 4095) == 0) {
 		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
 	}
-	set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS);
+	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 
 	/* FIXME...EIEIO, ENOSPC and more */
 
@@ -293,6 +287,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 						       inode->i_ino,
 						       last_pos_in_file,
 						       0, 0, hole_size);
+			btrfs_drop_extent_cache(inode, last_pos_in_file,
+					last_pos_in_file + hole_size -1);
 			btrfs_check_file(root, inode);
 		}
 		if (err)
@@ -320,12 +316,12 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		last_end += PAGE_CACHE_SIZE - 1;
 		if (start_pos < isize) {
 			u64 delalloc_start = start_pos;
-			existing_delalloc = count_range_bits(em_tree,
+			existing_delalloc = count_range_bits(io_tree,
 					     &delalloc_start,
 					     end_of_last_block, (u64)-1,
 					     EXTENT_DELALLOC);
 		}
-		set_extent_delalloc(em_tree, start_pos, end_of_last_block,
+		set_extent_delalloc(io_tree, start_pos, end_of_last_block,
 				 GFP_NOFS);
 		spin_lock(&root->fs_info->delalloc_lock);
 		root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
@@ -346,6 +342,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		inline_size -= start_pos;
 		err = insert_inline_extent(trans, root, inode, start_pos,
 					   inline_size, pages, 0, num_pages);
+		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
 		BUG_ON(err);
 	}
 	if (end_pos > isize) {
@@ -356,8 +353,7 @@ failed:
 	err = btrfs_end_transaction(trans, root);
 out_unlock:
 	mutex_unlock(&root->fs_info->fs_mutex);
-	unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
-	free_extent_map(em);
+	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 	return err;
 }
 
@@ -367,10 +363,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 
 	while(1) {
+		spin_lock(&em_tree->lock);
 		em = lookup_extent_mapping(em_tree, start, end);
-		if (!em)
+		if (!em) {
+			spin_unlock(&em_tree->lock);
 			break;
+		}
 		remove_extent_mapping(em_tree, em);
+		spin_unlock(&em_tree->lock);
+
 		/* once for us */
 		free_extent_map(em);
 		/* once for the tree*/
-- 
cgit v1.2.2


From 291d673e6a22d9c6834e939f66c7cfef90669021 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 29 Jan 2008 15:55:23 -0500
Subject: Btrfs: Do delalloc accounting via hooks in the extent_state code

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c5bb00f92396..8e210616d702 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -323,10 +323,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		}
 		set_extent_delalloc(io_tree, start_pos, end_of_last_block,
 				 GFP_NOFS);
-		spin_lock(&root->fs_info->delalloc_lock);
-		root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
-					  start_pos) - existing_delalloc;
-		spin_unlock(&root->fs_info->delalloc_lock);
 		btrfs_add_ordered_inode(inode);
 	} else {
 		u64 aligned_end;
-- 
cgit v1.2.2


From 6f568d35a045dbb8a13fe71bfc32e85e39a986cb Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 29 Jan 2008 16:03:38 -0500
Subject: Btrfs: mount -o max_inline=size to control the maximum inline extent
 size

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8e210616d702..7c10a90362ff 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -301,7 +301,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	 */
 	inline_size = end_pos;
 	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
-	    inline_size > 8192 ||
+	    inline_size > root->fs_info->max_inline ||
+	    (inline_size & (root->sectorsize -1)) == 0 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
 		u64 existing_delalloc = 0;
-- 
cgit v1.2.2


From 0181e58f91c1edbf835edb7a87d6dfe81374709d Mon Sep 17 00:00:00 2001
From: Yan <yanzheng@21cn.com>
Date: Wed, 30 Jan 2008 14:39:54 -0500
Subject: btrfs_drop_extent fixe for inline items > 8K

When truncating a inline extent, btrfs_drop_extents doesn't properly
handle the case "key.offset > inline_limit". This bug can only happen
when max line size is larger than 8K.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7c10a90362ff..7cbf8d8d7e27 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -591,8 +591,7 @@ next_slot:
 				}
 			}
 			bookend = 1;
-			if (found_inline && start <= key.offset &&
-			    inline_limit < extent_end)
+			if (found_inline && start <= key.offset)
 				keep = 1;
 		}
 		/* truncate existing extent */
@@ -672,11 +671,10 @@ next_slot:
 			if (!bookend)
 				continue;
 		}
-		if (bookend && found_inline && start <= key.offset &&
-		    inline_limit < extent_end && key.offset <= inline_limit) {
+		if (bookend && found_inline && start <= key.offset) {
 			u32 new_size;
 			new_size = btrfs_file_extent_calc_inline_size(
-						   extent_end - inline_limit);
+						   extent_end - end);
 			btrfs_truncate_item(trans, root, path, new_size, 0);
 		}
 		/* create bookend, splitting the extent in two */
-- 
cgit v1.2.2


From dacb473ad81d5c1650cb73d1cb9b866202f6bab0 Mon Sep 17 00:00:00 2001
From: Yan <yanzheng@21cn.com>
Date: Tue, 5 Feb 2008 09:07:49 -0500
Subject: Btrfs: Fix hole creation in file_write

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7cbf8d8d7e27..184bde1cfc3d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -265,16 +265,14 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 
 	/* FIXME...EIEIO, ENOSPC and more */
-
 	/* insert any holes we need to create */
-	if (inode->i_size < start_pos) {
+	if (isize < end_pos) {
 		u64 last_pos_in_file;
 		u64 hole_size;
 		u64 mask = root->sectorsize - 1;
 		last_pos_in_file = (isize + mask) & ~mask;
 		hole_size = (end_pos - last_pos_in_file + mask) & ~mask;
-
-		if (last_pos_in_file < start_pos) {
+		if (last_pos_in_file < end_pos) {
 			err = btrfs_drop_extents(trans, root, inode,
 						 last_pos_in_file,
 						 last_pos_in_file + hole_size,
-- 
cgit v1.2.2


From 7261009ceaee65e4683be6e474d1ea85a1b1b26c Mon Sep 17 00:00:00 2001
From: Yan <yanzheng@21cn.com>
Date: Tue, 5 Feb 2008 15:40:36 -0500
Subject: btrfs_drop_extents: handle BTRFS_INODE_REF_KEY types It's possible
 "key.type == BTRFS_INODE_REF_KEY" and "key.offset >= end".

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 184bde1cfc3d..bfa4149c053d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -507,11 +507,12 @@ next_slot:
 		slot = path->slots[0];
 		ret = 0;
 		btrfs_item_key_to_cpu(leaf, &key, slot);
-
-		if (key.offset >= end || key.objectid != inode->i_ino) {
+		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
+		    key.offset >= end) {
 			goto out;
 		}
-		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY) {
+		if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
+		    key.objectid != inode->i_ino) {
 			goto out;
 		}
 		if (recow) {
-- 
cgit v1.2.2


From 9069218d448ea547dbad5f1cbd537e88d6519d66 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 8 Feb 2008 13:49:28 -0500
Subject: Btrfs: Fix i_blocks accounting

Now that delayed allocation accounting works, i_blocks accounting is changed
to only modify i_blocks when extents inserted or removed.

The fillattr call is changed to include the delayed allocation byte count
in the i_blocks result.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bfa4149c053d..f89396082544 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -175,6 +175,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
 			leaf = path->nodes[0];
 			ei = btrfs_item_ptr(leaf, path->slots[0],
 					    struct btrfs_file_extent_item);
+			inode->i_blocks += (offset + size - found_end) >> 9;
 		}
 		if (found_end < offset) {
 			ptr = btrfs_file_extent_inline_start(ei) + found_size;
@@ -184,6 +185,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
 insert:
 		btrfs_release_path(root, path);
 		datasize = offset + size - key.offset;
+		inode->i_blocks += datasize >> 9;
 		datasize = btrfs_file_extent_calc_inline_size(datasize);
 		ret = btrfs_insert_empty_item(trans, root, path, &key,
 					      datasize);
@@ -256,7 +258,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		goto out_unlock;
 	}
 	btrfs_set_trans_block_group(trans, inode);
-	inode->i_blocks += num_bytes >> 9;
 	hint_byte = 0;
 
 	if ((end_of_last_block & 4095) == 0) {
@@ -410,7 +411,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
 		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
 			goto out;
 
-		if (found_key.offset != last_offset) {
+		if (found_key.offset < last_offset) {
 			WARN_ON(1);
 			btrfs_print_leaf(root, leaf);
 			printk("inode %lu found offset %Lu expected %Lu\n",
@@ -435,7 +436,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
 		last_offset = extent_end;
 		path->slots[0]++;
 	}
-	if (last_offset < inode->i_size) {
+	if (0 && last_offset < inode->i_size) {
 		WARN_ON(1);
 		btrfs_print_leaf(root, leaf);
 		printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino,
@@ -608,8 +609,7 @@ next_slot:
 								      extent);
 				if (btrfs_file_extent_disk_bytenr(leaf,
 								  extent)) {
-					inode->i_blocks -=
-						(old_num - new_num) >> 9;
+					dec_i_blocks(inode, old_num - new_num);
 				}
 				btrfs_set_file_extent_num_bytes(leaf, extent,
 								new_num);
@@ -620,6 +620,8 @@ next_slot:
 				u32 new_size;
 				new_size = btrfs_file_extent_calc_inline_size(
 						   inline_limit - key.offset);
+				dec_i_blocks(inode, (extent_end - key.offset) -
+					(inline_limit - key.offset));
 				btrfs_truncate_item(trans, root, path,
 						    new_size, 1);
 			}
@@ -653,7 +655,7 @@ next_slot:
 			btrfs_release_path(root, path);
 			extent = NULL;
 			if (found_extent && disk_bytenr != 0) {
-				inode->i_blocks -= extent_num_bytes >> 9;
+				dec_i_blocks(inode, extent_num_bytes);
 				ret = btrfs_free_extent(trans, root,
 						disk_bytenr,
 						disk_num_bytes,
@@ -674,6 +676,8 @@ next_slot:
 			u32 new_size;
 			new_size = btrfs_file_extent_calc_inline_size(
 						   extent_end - end);
+			dec_i_blocks(inode, (extent_end - key.offset) -
+					(extent_end - end));
 			btrfs_truncate_item(trans, root, path, new_size, 0);
 		}
 		/* create bookend, splitting the extent in two */
@@ -718,6 +722,7 @@ next_slot:
 	}
 out:
 	btrfs_free_path(path);
+	btrfs_check_file(root, inode);
 	return ret;
 }
 
-- 
cgit v1.2.2


From 39b5637f6f195852259004bb27b58e2dcf9fb378 Mon Sep 17 00:00:00 2001
From: Yan <yanzheng@21cn.com>
Date: Fri, 15 Feb 2008 10:40:50 -0500
Subject: Btrfs: Fix "no csum found for inode" issue.

A few codes were not properly updated for changes of extent map.  This
may be the causes of "no csum found for inode" issue.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f89396082544..b0352b5958a5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -357,10 +357,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 {
 	struct extent_map *em;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	u64 len = end - start + 1;
 
+	if (end == (u64)-1)
+		len = (u64)-1;
 	while(1) {
 		spin_lock(&em_tree->lock);
-		em = lookup_extent_mapping(em_tree, start, end);
+		em = lookup_extent_mapping(em_tree, start, len);
 		if (!em) {
 			spin_unlock(&em_tree->lock);
 			break;
-- 
cgit v1.2.2


From 0762704b196d41941a9b439e9165efaf85c6609e Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 19 Feb 2008 11:29:24 -0500
Subject: Btrfs: Properly clear dirty and delalloc extent bits while preparing
 the file for write

Yan Zheng noticed that we don't clear the extent state tree dirty and delalloc
bits when we clear the dirty bits on the page during file write.

This leads to csum errors later on.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b0352b5958a5..1a47251a9d4d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -762,6 +762,13 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
 		set_page_extent_mapped(pages[i]);
 		WARN_ON(!PageLocked(pages[i]));
 	}
+	if (start_pos < inode->i_size) {
+		u64 last_pos;
+		last_pos = (index + num_pages) << PAGE_CACHE_SHIFT;
+		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
+				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
+				  GFP_NOFS);
+	}
 	return 0;
 }
 
-- 
cgit v1.2.2


From d99cb30a11ea117ad0f58cda09aa43438ebbf81c Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 19 Feb 2008 12:55:05 -0500
Subject: Btrfs: Take the extent lock before dropping the delalloc bits

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1a47251a9d4d..51466dcd44e0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -765,9 +765,13 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
 	if (start_pos < inode->i_size) {
 		u64 last_pos;
 		last_pos = (index + num_pages) << PAGE_CACHE_SHIFT;
+		lock_extent(&BTRFS_I(inode)->io_tree,
+			    start_pos, last_pos - 1, GFP_NOFS);
 		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
 				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
 				  GFP_NOFS);
+		unlock_extent(&BTRFS_I(inode)->io_tree,
+			      start_pos, last_pos - 1, GFP_NOFS);
 	}
 	return 0;
 }
-- 
cgit v1.2.2


From 0740c82bc7585a23e100c9a59d5e2abbf99459fb Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 19 Feb 2008 16:24:18 -0500
Subject: Btrfs: Properly cast before shifting

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 51466dcd44e0..5fa450452f9b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -764,7 +764,7 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
 	}
 	if (start_pos < inode->i_size) {
 		u64 last_pos;
-		last_pos = (index + num_pages) << PAGE_CACHE_SHIFT;
+		last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
 		lock_extent(&BTRFS_I(inode)->io_tree,
 			    start_pos, last_pos - 1, GFP_NOFS);
 		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
-- 
cgit v1.2.2


From 16432985920f3c45af82da214e2498f3e2f9066b Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 10 Apr 2008 10:23:21 -0400
Subject: Btrfs: Add O_DIRECT read and write (writes == buffered + cache flush)

This adds basic O_DIRECT read and write support.  In the write case, we
just do a normal buffered write followed by a cache flush.  O_DIRECT +
O_SYNC are required to trigger metadata syncs.

In the read case, there is a basic btrfs_get_block call for use by
the generic O_DIRECT code.  This does honor multi-volume mapping rules
but it skips all checksumming.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5fa450452f9b..5b9386452218 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -796,8 +796,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
 	pinned[0] = NULL;
 	pinned[1] = NULL;
-	if (file->f_flags & O_DIRECT)
-		return -EINVAL;
 
 	pos = *ppos;
 	start_pos = pos;
@@ -909,6 +907,15 @@ out_nolock:
 				      start_pos, num_written);
 		if (err < 0)
 			num_written = err;
+	} else if (num_written > 0 && (file->f_flags & O_DIRECT)) {
+		do_sync_mapping_range(inode->i_mapping, start_pos,
+				      start_pos + num_written - 1,
+				      SYNC_FILE_RANGE_WRITE |
+				      SYNC_FILE_RANGE_WAIT_AFTER);
+
+		invalidate_mapping_pages(inode->i_mapping,
+		      start_pos >> PAGE_CACHE_SHIFT,
+		     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
 	}
 	current->backing_dev_info = NULL;
 	return num_written ? num_written : err;
-- 
cgit v1.2.2


From b248a4152956cbae1b23f4c70ef5b51d6ea2ecfb Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 14 Apr 2008 09:48:18 -0400
Subject: Btrfs: A few updates for 2.6.18 and versions older than 2.6.25

This includes fixing a missing spinlock init call that caused oops on mount
for most kernels other than 2.6.25.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5b9386452218..9fbda6552069 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -908,11 +908,17 @@ out_nolock:
 		if (err < 0)
 			num_written = err;
 	} else if (num_written > 0 && (file->f_flags & O_DIRECT)) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+		do_sync_file_range(file, start_pos,
+				      start_pos + num_written - 1,
+				      SYNC_FILE_RANGE_WRITE |
+				      SYNC_FILE_RANGE_WAIT_AFTER);
+#else
 		do_sync_mapping_range(inode->i_mapping, start_pos,
 				      start_pos + num_written - 1,
 				      SYNC_FILE_RANGE_WRITE |
 				      SYNC_FILE_RANGE_WAIT_AFTER);
-
+#endif
 		invalidate_mapping_pages(inode->i_mapping,
 		      start_pos >> PAGE_CACHE_SHIFT,
 		     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
-- 
cgit v1.2.2


From 3b951516ed703af0f6d82053937655ad69b60864 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 17 Apr 2008 11:29:12 -0400
Subject: Btrfs: Use the extent map cache to find the logical disk block during
 data retries

The data read retry code needs to find the logical disk block before it
can resubmit new bios.  But, finding this block isn't allowed to take
the fs_mutex because that will deadlock with a number of different callers.

This changes the retry code to use the extent map cache instead, but
that requires the extent map cache to have the extent we're looking for.
This is a problem because btrfs_drop_extent_cache just drops the entire
extent instead of the little tiny part it is invalidating.

The bulk of the code in this patch changes btrfs_drop_extent_cache to
invalidate only a portion of the extent cache, and changes btrfs_get_extent
to deal with the results.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9fbda6552069..3f5525f0834c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -356,12 +356,23 @@ out_unlock:
 int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 {
 	struct extent_map *em;
+	struct extent_map *split = NULL;
+	struct extent_map *split2 = NULL;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	u64 len = end - start + 1;
+	int ret;
+	int testend = 1;
 
-	if (end == (u64)-1)
+	if (end == (u64)-1) {
 		len = (u64)-1;
+		testend = 0;
+	}
 	while(1) {
+		if (!split)
+			split = alloc_extent_map(GFP_NOFS);
+		if (!split2)
+			split2 = alloc_extent_map(GFP_NOFS);
+
 		spin_lock(&em_tree->lock);
 		em = lookup_extent_mapping(em_tree, start, len);
 		if (!em) {
@@ -369,6 +380,36 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 			break;
 		}
 		remove_extent_mapping(em_tree, em);
+
+		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
+		    em->start < start) {
+			split->start = em->start;
+			split->len = start - em->start;
+			split->block_start = em->block_start;
+			split->bdev = em->bdev;
+			split->flags = em->flags;
+			ret = add_extent_mapping(em_tree, split);
+			BUG_ON(ret);
+			free_extent_map(split);
+			split = split2;
+			split2 = NULL;
+		}
+		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
+		    testend && em->start + em->len > start + len) {
+			u64 diff = start + len - em->start;
+
+			split->start = start + len;
+			split->len = em->start + em->len - (start + len);
+			split->bdev = em->bdev;
+			split->flags = em->flags;
+
+			split->block_start = em->block_start + diff;
+
+			ret = add_extent_mapping(em_tree, split);
+			BUG_ON(ret);
+			free_extent_map(split);
+			split = NULL;
+		}
 		spin_unlock(&em_tree->lock);
 
 		/* once for us */
@@ -376,6 +417,10 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 		/* once for the tree*/
 		free_extent_map(em);
 	}
+	if (split)
+		free_extent_map(split);
+	if (split2)
+		free_extent_map(split2);
 	return 0;
 }
 
-- 
cgit v1.2.2


From 409c6118d39cb2d8666bee3d61a1a9ae5bbd4f5d Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 22 Apr 2008 09:24:20 -0400
Subject: Btrfs: Set nodatasum on the inode when written by a nodatasum mount

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3f5525f0834c..5d537f26dc83 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -863,6 +863,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	first_index = pos >> PAGE_CACHE_SHIFT;
 	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
 
+	/*
+	 * if this is a nodatasum mount, force summing off for the inode
+	 * all the time.  That way a later mount with summing on won't
+	 * get confused
+	 */
+	if (btrfs_test_opt(root, NODATASUM))
+		btrfs_set_flag(inode, NODATASUM);
+
 	/*
 	 * there are lots of better ways to do this, but this code
 	 * makes sure the first and last page in the file range are
-- 
cgit v1.2.2


From 81d7ed29ff6bdec903c36c26b386e16c014993b2 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 25 Apr 2008 08:51:48 -0400
Subject: Btrfs: Throttle file_write when data=ordered is flushing the inode

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5d537f26dc83..8effdf4f5d6f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -977,6 +977,7 @@ out_nolock:
 		     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
 	}
 	current->backing_dev_info = NULL;
+	btrfs_ordered_throttle(root, inode);
 	return num_written ? num_written : err;
 }
 
-- 
cgit v1.2.2


From f2eb0a241f0e5c135d93243b0236cb1f14c305e0 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 2 May 2008 14:43:14 -0400
Subject: Btrfs: Clone file data ioctl

Add a new ioctl to clone file data

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8effdf4f5d6f..a50507f3056d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -285,7 +285,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			err = btrfs_insert_file_extent(trans, root,
 						       inode->i_ino,
 						       last_pos_in_file,
-						       0, 0, hole_size);
+						       0, 0, hole_size, 0);
 			btrfs_drop_extent_cache(inode, last_pos_in_file,
 					last_pos_in_file + hole_size -1);
 			btrfs_check_file(root, inode);
-- 
cgit v1.2.2


From bb8885cc0aa6b1606143d92c70762b16026016a5 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 2 May 2008 14:49:33 -0400
Subject: Btrfs: Fix do_sync_file_range ifdefs (2.6.22)

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a50507f3056d..c89c3ac4b23b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -961,7 +961,7 @@ out_nolock:
 		if (err < 0)
 			num_written = err;
 	} else if (num_written > 0 && (file->f_flags & O_DIRECT)) {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
 		do_sync_file_range(file, start_pos,
 				      start_pos + num_written - 1,
 				      SYNC_FILE_RANGE_WRITE |
-- 
cgit v1.2.2


From 12fa8ec64f445aa932ba154053fe95432f30f2c6 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 2 May 2008 15:03:58 -0400
Subject: Btrfs: Add workaround for AppArmor changing remove_suid()

In openSUSE 10.3, AppArmor modifies remove_suid to take a struct path
rather than just a dentry. This patch tests that the kernel is openSUSE
10.3 or newer and adjusts the call accordingly.

Debian/Ubuntu with AppArmor applied will also need a similar patch.
Maintainers of btrfs under those distributions should build on this
patch or, alternatively, alter their package descriptions to add
-DREMOVE_SUID_PATH to the compiler command line.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
- --- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ b/compat.h	2008-02-06 16:46:13.000000000 -0500
@@ -0,0 +1,15 @@
+#ifndef _COMPAT_H_
+#define _COMPAT_H_
+
+
+/*
+ * Even if AppArmor isn't enabled, it still has different prototypes.
+ * Add more distro/version pairs here to declare which has AppArmor applied.
+ */
+#if defined(CONFIG_SUSE_KERNEL)
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+# define REMOVE_SUID_PATH 1
+# endif
+#endif
+
+#endif /* _COMPAT_H_ */
- --- a/file.c	2008-02-06 11:37:39.000000000 -0500
+++ b/file.c	2008-02-06 16:46:23.000000000 -0500
@@ -37,6 +37,7 @@
 #include "ordered-data.h"
 #include "ioctl.h"
 #include "print-tree.h"
+#include "compat.h"

 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
@@ -790,7 +791,11 @@ static ssize_t btrfs_file_write(struct f
 		goto out_nolock;
 	if (count == 0)
 		goto out_nolock;
+#ifdef REMOVE_SUID_PATH
+	err = remove_suid(&file->f_path);
+#else
 	err = remove_suid(fdentry(file));
+#endif
 	if (err)
 		goto out_nolock;
 	file_update_time(file);

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c89c3ac4b23b..df97d470812a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -37,6 +37,7 @@
 #include "ordered-data.h"
 #include "ioctl.h"
 #include "print-tree.h"
+#include "compat.h"
 
 
 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
@@ -852,7 +853,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		goto out_nolock;
 	if (count == 0)
 		goto out_nolock;
+#ifdef REMOVE_SUID_PATH
+	err = remove_suid(&file->f_path);
+#else
 	err = remove_suid(fdentry(file));
+#endif
 	if (err)
 		goto out_nolock;
 	file_update_time(file);
-- 
cgit v1.2.2


From 211c17f51f46dc6c308c742098273dd46b5ca59c Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 15 May 2008 09:13:45 -0400
Subject: Fix corners in writepage and btrfs_truncate_page

The extent_io writepage calls needed an extra check for discarding
pages that started on th last byte in the file.

btrfs_truncate_page needed checks to make sure the page was still part
of the file after reading it, and most importantly, needed to wait for
all IO to the page to finish before freeing the corresponding extents on
disk.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index df97d470812a..d968900c6973 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -305,7 +305,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	    (inline_size & (root->sectorsize -1)) == 0 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
 		u64 last_end;
-		u64 existing_delalloc = 0;
 
 		for (i = 0; i < num_pages; i++) {
 			struct page *p = pages[i];
@@ -315,13 +314,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		last_end = (u64)(pages[num_pages -1]->index) <<
 				PAGE_CACHE_SHIFT;
 		last_end += PAGE_CACHE_SIZE - 1;
-		if (start_pos < isize) {
-			u64 delalloc_start = start_pos;
-			existing_delalloc = count_range_bits(io_tree,
-					     &delalloc_start,
-					     end_of_last_block, (u64)-1,
-					     EXTENT_DELALLOC);
-		}
 		set_extent_delalloc(io_tree, start_pos, end_of_last_block,
 				 GFP_NOFS);
 		btrfs_add_ordered_inode(inode);
-- 
cgit v1.2.2


From e1b81e6761bd8419146d4bbe0aadd8b2d348a01c Mon Sep 17 00:00:00 2001
From: Mingming <cmm@us.ibm.com>
Date: Tue, 27 May 2008 10:55:43 -0400
Subject: btrfs delete ordered inode handling fix

Use btrfs_release_file instead of a put_inode call

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d968900c6973..c4fa4664a456 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -978,6 +978,12 @@ out_nolock:
 	return num_written ? num_written : err;
 }
 
+static int btrfs_release_file (struct inode * inode, struct file * filp)
+{
+	btrfs_del_ordered_inode(inode);
+	return 0;
+}
+
 static int btrfs_sync_file(struct file *file,
 			   struct dentry *dentry, int datasync)
 {
@@ -1044,6 +1050,7 @@ struct file_operations btrfs_file_operations = {
 	.write		= btrfs_file_write,
 	.mmap		= btrfs_file_mmap,
 	.open		= generic_file_open,
+	.release	= btrfs_release_file,
 	.fsync		= btrfs_sync_file,
 	.unlocked_ioctl	= btrfs_ioctl,
 #ifdef CONFIG_COMPAT
-- 
cgit v1.2.2


From 6bf13c0cc833bf5ba013d6aa60379484bf48c4e6 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 10 Jun 2008 10:07:39 -0400
Subject: Btrfs: transaction ioctls

These ioctls let a user application hold a transaction open while it
performs a series of operations.  A final ioctl does a sync on the fs
(closing the current transaction).  This is the main requirement for
Ceph's OSD to be able to keep the data it's storing in a btrfs volume
consistent, and AFAICS it works just fine.  The application would do
something like

	fd = ::open("some/file", O_RDONLY);
	::ioctl(fd, BTRFS_IOC_TRANS_START);
	/* do a bunch of stuff */
	::ioctl(fd, BTRFS_IOC_TRANS_END);
or just
	::close(fd);

And to ensure it commits to disk,

	::ioctl(fd, BTRFS_IOC_SYNC);

When a transaction is held open, the trans_handle is attached to the
struct file (via private_data) so that it will get cleaned up if the
process dies unexpectedly.  A held transaction is also ended on fsync() to
avoid a deadlock.

A misbehaving application could also deliberately hold a transaction open,
effectively locking up the FS, so it may make sense to restrict something
like this to root or something.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c4fa4664a456..73c6d085bd90 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -978,9 +978,11 @@ out_nolock:
 	return num_written ? num_written : err;
 }
 
-static int btrfs_release_file (struct inode * inode, struct file * filp)
+int btrfs_release_file(struct inode * inode, struct file * filp)
 {
 	btrfs_del_ordered_inode(inode);
+	if (filp->private_data)
+		btrfs_ioctl_trans_end(filp);
 	return 0;
 }
 
@@ -1011,6 +1013,9 @@ static int btrfs_sync_file(struct file *file,
 	/*
 	 * ok we haven't committed the transaction yet, lets do a commit
 	 */
+	if (file->private_data)
+		btrfs_ioctl_trans_end(file);
+
 	trans = btrfs_start_transaction(root, 1);
 	if (!trans) {
 		ret = -ENOMEM;
-- 
cgit v1.2.2


From a213501153fd66e2359e091b1612841305ba6551 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 25 Jun 2008 16:01:30 -0400
Subject: Btrfs: Replace the big fs_mutex with a collection of other locks

Extent alloctions are still protected by a large alloc_mutex.
Objectid allocations are covered by a objectid mutex
Other btree operations are protected by a lock on individual btree nodes

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 73c6d085bd90..18bbe108a0e6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -252,7 +252,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	end_of_last_block = start_pos + num_bytes - 1;
 
 	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-	mutex_lock(&root->fs_info->fs_mutex);
 	trans = btrfs_start_transaction(root, 1);
 	if (!trans) {
 		err = -ENOMEM;
@@ -341,7 +340,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 failed:
 	err = btrfs_end_transaction(trans, root);
 out_unlock:
-	mutex_unlock(&root->fs_info->fs_mutex);
 	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 	return err;
 }
@@ -905,9 +903,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		WARN_ON(num_pages > nrptrs);
 		memset(pages, 0, sizeof(pages));
 
-		mutex_lock(&root->fs_info->fs_mutex);
 		ret = btrfs_check_free_space(root, write_bytes, 0);
-		mutex_unlock(&root->fs_info->fs_mutex);
 		if (ret)
 			goto out;
 
@@ -998,9 +994,9 @@ static int btrfs_sync_file(struct file *file,
 	 * check the transaction that last modified this inode
 	 * and see if its already been committed
 	 */
-	mutex_lock(&root->fs_info->fs_mutex);
 	if (!BTRFS_I(inode)->last_trans)
 		goto out;
+
 	mutex_lock(&root->fs_info->trans_mutex);
 	if (BTRFS_I(inode)->last_trans <=
 	    root->fs_info->last_trans_committed) {
@@ -1023,7 +1019,6 @@ static int btrfs_sync_file(struct file *file,
 	}
 	ret = btrfs_commit_transaction(trans, root);
 out:
-	mutex_unlock(&root->fs_info->fs_mutex);
 	return ret > 0 ? EIO : ret;
 }
 
-- 
cgit v1.2.2


From 594a24eb0e7fa8413f8b443863be4b7c72bfde9f Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 25 Jun 2008 16:01:30 -0400
Subject: Fix btrfs_del_ordered_inode to allow forcing the drop during unlinks

This allows us to delete an unlinked inode with dirty pages from the list
instead of forcing commit to write these out before deleting the inode.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 18bbe108a0e6..b7f8f92daf8a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -976,7 +976,7 @@ out_nolock:
 
 int btrfs_release_file(struct inode * inode, struct file * filp)
 {
-	btrfs_del_ordered_inode(inode);
+	btrfs_del_ordered_inode(inode, 0);
 	if (filp->private_data)
 		btrfs_ioctl_trans_end(filp);
 	return 0;
-- 
cgit v1.2.2


From 89ce8a63d0c761fbb02089850605360f389477d8 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 25 Jun 2008 16:01:31 -0400
Subject: Add btrfs_end_transaction_throttle to force writers to wait for
 pending commits

The existing throttle mechanism was often not sufficient to prevent
new writers from coming in and making a given transaction run forever.
This adds an explicit wait at the end of most operations so they will
allow the current transaction to close.

There is no wait inside file_write, inode updates, or cow filling, all which
have different deadlock possibilities.

This is a temporary measure until better asynchronous commit support is
added.  This code leads to stalls as it waits for data=ordered
writeback, and it really needs to be fixed.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b7f8f92daf8a..ece221cba90c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -934,7 +934,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
 		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
 			btrfs_btree_balance_dirty(root, 1);
-		btrfs_throttle(root);
 		cond_resched();
 	}
 out:
-- 
cgit v1.2.2


From 1b1e2135dc1e4efbcf25ac9ac9979316d4e1193e Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 25 Jun 2008 16:01:31 -0400
Subject: Btrfs: Add a per-inode csum mutex to avoid races creating csum items

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ece221cba90c..8037792f8789 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -267,13 +267,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 
 	/* FIXME...EIEIO, ENOSPC and more */
 	/* insert any holes we need to create */
-	if (isize < end_pos) {
+	if (isize < start_pos) {
 		u64 last_pos_in_file;
 		u64 hole_size;
 		u64 mask = root->sectorsize - 1;
 		last_pos_in_file = (isize + mask) & ~mask;
-		hole_size = (end_pos - last_pos_in_file + mask) & ~mask;
-		if (last_pos_in_file < end_pos) {
+		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
+		if (last_pos_in_file < start_pos) {
 			err = btrfs_drop_extents(trans, root, inode,
 						 last_pos_in_file,
 						 last_pos_in_file + hole_size,
-- 
cgit v1.2.2


From e6dcd2dc9c489108648e2ed543315dd134d50a9a Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 17 Jul 2008 12:53:50 -0400
Subject: Btrfs: New data=ordered implementation

The old data=ordered code would force commit to wait until
all the data extents from the transaction were fully on disk.  This
introduced large latencies into the commit and stalled new writers
in the transaction for a long time.

The new code changes the way data allocations and extents work:

* When delayed allocation is filled, data extents are reserved, and
  the extent bit EXTENT_ORDERED is set on the entire range of the extent.
  A struct btrfs_ordered_extent is allocated an inserted into a per-inode
  rbtree to track the pending extents.

* As each page is written EXTENT_ORDERED is cleared on the bytes corresponding
  to that page.

* When all of the bytes corresponding to a single struct btrfs_ordered_extent
  are written, The previously reserved extent is inserted into the FS
  btree and into the extent allocation trees.  The checksums for the file
  data are also updated.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 67 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 22 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8037792f8789..12e765f7e0d4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -34,7 +34,6 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
-#include "ordered-data.h"
 #include "ioctl.h"
 #include "print-tree.h"
 #include "compat.h"
@@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		u64 mask = root->sectorsize - 1;
 		last_pos_in_file = (isize + mask) & ~mask;
 		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
-		if (last_pos_in_file < start_pos) {
+		if (hole_size > 0) {
+			btrfs_wait_ordered_range(inode, last_pos_in_file,
+						 last_pos_in_file + hole_size);
 			err = btrfs_drop_extents(trans, root, inode,
 						 last_pos_in_file,
 						 last_pos_in_file + hole_size,
@@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	    inline_size > root->fs_info->max_inline ||
 	    (inline_size & (root->sectorsize -1)) == 0 ||
 	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
-		u64 last_end;
-
+		/* check for reserved extents on each page, we don't want
+		 * to reset the delalloc bit on things that already have
+		 * extents reserved.
+		 */
+		set_extent_delalloc(io_tree, start_pos,
+				    end_of_last_block, GFP_NOFS);
 		for (i = 0; i < num_pages; i++) {
 			struct page *p = pages[i];
 			SetPageUptodate(p);
 			set_page_dirty(p);
 		}
-		last_end = (u64)(pages[num_pages -1]->index) <<
-				PAGE_CACHE_SHIFT;
-		last_end += PAGE_CACHE_SIZE - 1;
-		set_extent_delalloc(io_tree, start_pos, end_of_last_block,
-				 GFP_NOFS);
-		btrfs_add_ordered_inode(inode);
 	} else {
 		u64 aligned_end;
 		/* step one, delete the existing extents in this range */
@@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 	struct extent_map *split = NULL;
 	struct extent_map *split2 = NULL;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map *tmp;
 	u64 len = end - start + 1;
+	u64 next_start;
 	int ret;
 	int testend = 1;
 
+	WARN_ON(end < start);
 	if (end == (u64)-1) {
 		len = (u64)-1;
 		testend = 0;
@@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 			spin_unlock(&em_tree->lock);
 			break;
 		}
+		tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
+		next_start = tmp->start;
 		remove_extent_mapping(em_tree, em);
 
 		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
@@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
 	struct inode *inode = fdentry(file)->d_inode;
 	int err = 0;
 	u64 start_pos;
+	u64 last_pos;
 
 	start_pos = pos & ~((u64)root->sectorsize - 1);
+	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
 
 	memset(pages, 0, num_pages * sizeof(struct page *));
-
+again:
 	for (i = 0; i < num_pages; i++) {
 		pages[i] = grab_cache_page(inode->i_mapping, index + i);
 		if (!pages[i]) {
 			err = -ENOMEM;
 			BUG_ON(1);
 		}
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-		ClearPageDirty(pages[i]);
-#else
-		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
-#endif
 		wait_on_page_writeback(pages[i]);
-		set_page_extent_mapped(pages[i]);
-		WARN_ON(!PageLocked(pages[i]));
 	}
 	if (start_pos < inode->i_size) {
-		u64 last_pos;
-		last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
+		struct btrfs_ordered_extent *ordered;
 		lock_extent(&BTRFS_I(inode)->io_tree,
 			    start_pos, last_pos - 1, GFP_NOFS);
+		ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
+		if (ordered &&
+		    ordered->file_offset + ordered->len > start_pos &&
+		    ordered->file_offset < last_pos) {
+			btrfs_put_ordered_extent(ordered);
+			unlock_extent(&BTRFS_I(inode)->io_tree,
+				      start_pos, last_pos - 1, GFP_NOFS);
+			for (i = 0; i < num_pages; i++) {
+				unlock_page(pages[i]);
+				page_cache_release(pages[i]);
+			}
+			btrfs_wait_ordered_range(inode, start_pos,
+						 last_pos - start_pos);
+			goto again;
+		}
+		if (ordered)
+			btrfs_put_ordered_extent(ordered);
+
 		clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
 				  last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
 				  GFP_NOFS);
 		unlock_extent(&BTRFS_I(inode)->io_tree,
 			      start_pos, last_pos - 1, GFP_NOFS);
 	}
+	for (i = 0; i < num_pages; i++) {
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+		ClearPageDirty(pages[i]);
+#else
+		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
+#endif
+		set_page_extent_mapped(pages[i]);
+		WARN_ON(!PageLocked(pages[i]));
+	}
 	return 0;
 }
 
@@ -969,13 +994,11 @@ out_nolock:
 		     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
 	}
 	current->backing_dev_info = NULL;
-	btrfs_ordered_throttle(root, inode);
 	return num_written ? num_written : err;
 }
 
 int btrfs_release_file(struct inode * inode, struct file * filp)
 {
-	btrfs_del_ordered_inode(inode, 0);
 	if (filp->private_data)
 		btrfs_ioctl_trans_end(filp);
 	return 0;
-- 
cgit v1.2.2


From 247e743cbe6e655768c3679f84821e03c1577902 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 17 Jul 2008 12:53:51 -0400
Subject: Btrfs: Use async helpers to deal with pages that have been improperly
 dirtied

Higher layers sometimes call set_page_dirty without asking the filesystem
to help.  This causes many problems for the data=ordered and cow code.
This commit detects pages that haven't been properly setup for IO and
kicks off an async helper to deal with them.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 12e765f7e0d4..20928639d173 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -313,6 +313,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		for (i = 0; i < num_pages; i++) {
 			struct page *p = pages[i];
 			SetPageUptodate(p);
+			ClearPageChecked(p);
 			set_page_dirty(p);
 		}
 	} else {
-- 
cgit v1.2.2


From dbe674a99c8af088faa4c95eddaeb271a3140ab6 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 17 Jul 2008 12:54:05 -0400
Subject: Btrfs: Update on disk i_size only after pending ordered extents are
 done

This changes the ordered data code to update i_size after the extent
is on disk.  An on disk i_size is maintained in the in-memory btrfs inode
structures, and this is updated as extents finish.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 20928639d173..3e4e5c227c0c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -338,7 +338,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		btrfs_update_inode(trans, root, inode);
 	}
 failed:
-	err = btrfs_end_transaction(trans, root);
+	err = btrfs_end_transaction_throttle(trans, root);
 out_unlock:
 	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 	return err;
-- 
cgit v1.2.2


From f9295749388f82c8d2f485e99c72cd7c7876a99b Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 17 Jul 2008 12:54:14 -0400
Subject: btrfs_start_transaction: wait for commits in progress to finish

btrfs_commit_transaction has to loop waiting for any writers in the
transaction to finish before it can proceed.  btrfs_start_transaction
should be polite and not join a transaction that is in the process
of being finished off.

There are a few places that can't wait, basically the ones doing IO that
might be needed to finish the transaction.  For them, btrfs_join_transaction
is added.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3e4e5c227c0c..d6505892cd52 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -251,7 +251,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	end_of_last_block = start_pos + num_bytes - 1;
 
 	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-	trans = btrfs_start_transaction(root, 1);
+	trans = btrfs_join_transaction(root, 1);
 	if (!trans) {
 		err = -ENOMEM;
 		goto out_unlock;
-- 
cgit v1.2.2


From ba1da2f442ec91a1534afa893f9bef7e33056ace Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 17 Jul 2008 12:54:15 -0400
Subject: Btrfs: Don't pin pages in ram until the entire ordered extent is on
 disk.

Checksum items are not inserted until the entire ordered extent is on disk,
but individual pages might be clean and available for reclaim long before
the whole extent is on disk.

In order to allow those pages to be freed, we need to be able to search
the list of ordered extents to find the checksum that is going to be inserted
in the tree.  This way if the page needs to be read back in before
the checksums are in the btree, we'll be able to verify the checksum on
the page.

This commit adds the ability to search the pending ordered extents for
a given offset in the file, and changes btrfs_releasepage to allow
ordered pages to be freed.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d6505892cd52..3e4e5c227c0c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -251,7 +251,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	end_of_last_block = start_pos + num_bytes - 1;
 
 	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-	trans = btrfs_join_transaction(root, 1);
+	trans = btrfs_start_transaction(root, 1);
 	if (!trans) {
 		err = -ENOMEM;
 		goto out_unlock;
-- 
cgit v1.2.2


From ee6e6504e147a59a9f4d582662c105e9d72ae638 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 17 Jul 2008 12:54:40 -0400
Subject: Add a per-inode lock around btrfs_drop_extents

btrfs_drop_extents is always called with a range lock held on the inode.
But, it may operate on extents outside that range as it drops and splits
them.

This patch adds a per-inode mutex that is held while calling
btrfs_drop_extents and while inserting new extents into the tree.  It
prevents races from two procs working against adjacent ranges in the tree.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3e4e5c227c0c..40ad1b2958cb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -242,6 +242,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	u64 end_of_last_block;
 	u64 end_pos = pos + write_bytes;
 	u64 inline_size;
+	int did_inline = 0;
 	loff_t isize = i_size_read(inode);
 
 	start_pos = pos & ~((u64)root->sectorsize - 1);
@@ -275,6 +276,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		if (hole_size > 0) {
 			btrfs_wait_ordered_range(inode, last_pos_in_file,
 						 last_pos_in_file + hole_size);
+			mutex_lock(&BTRFS_I(inode)->extent_mutex);
 			err = btrfs_drop_extents(trans, root, inode,
 						 last_pos_in_file,
 						 last_pos_in_file + hole_size,
@@ -289,6 +291,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 						       0, 0, hole_size, 0);
 			btrfs_drop_extent_cache(inode, last_pos_in_file,
 					last_pos_in_file + hole_size -1);
+			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 			btrfs_check_file(root, inode);
 		}
 		if (err)
@@ -321,6 +324,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		/* step one, delete the existing extents in this range */
 		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
 			~((u64)root->sectorsize - 1);
+		mutex_lock(&BTRFS_I(inode)->extent_mutex);
 		err = btrfs_drop_extents(trans, root, inode, start_pos,
 					 aligned_end, aligned_end, &hint_byte);
 		if (err)
@@ -332,9 +336,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 					   inline_size, pages, 0, num_pages);
 		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
 		BUG_ON(err);
+		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
+		did_inline = 1;
 	}
 	if (end_pos > isize) {
 		i_size_write(inode, end_pos);
+		if (did_inline)
+			BTRFS_I(inode)->disk_i_size = end_pos;
 		btrfs_update_inode(trans, root, inode);
 	}
 failed:
-- 
cgit v1.2.2


From 7f3c74fb831fa19bafe087e817c0a5ff3883f1ea Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 18 Jul 2008 12:01:11 -0400
Subject: Btrfs: Keep extent mappings in ram until pending ordered extents are
 done

It was possible for stale mappings from disk to be used instead of the
new pending ordered extent.  This adds a flag to the extent map struct
to keep it pinned until the pending ordered extent is actually on disk.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 40ad1b2958cb..eccdb9562ba8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -358,9 +358,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 	struct extent_map *split = NULL;
 	struct extent_map *split2 = NULL;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-	struct extent_map *tmp;
 	u64 len = end - start + 1;
-	u64 next_start;
 	int ret;
 	int testend = 1;
 
@@ -381,8 +379,16 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 			spin_unlock(&em_tree->lock);
 			break;
 		}
-		tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
-		next_start = tmp->start;
+		if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
+			start = em->start + em->len;
+			free_extent_map(em);
+			spin_unlock(&em_tree->lock);
+			if (start < end) {
+				len = end - start + 1;
+				continue;
+			}
+			break;
+		}
 		remove_extent_mapping(em_tree, em);
 
 		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
-- 
cgit v1.2.2


From 4a09675279674041862d2210635b0cc1f60be28e Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 21 Jul 2008 10:29:44 -0400
Subject: Btrfs: Data ordered fixes

* In btrfs_delete_inode, wait for ordered extents after calling
truncate_inode_pages.  This is much faster, and more correct

* Properly clear our the PageChecked bit everywhere we redirty the page.

* Change the writepage fixup handler to lock the page range and check to
see if an ordered extent had been inserted since the improperly dirtied
page was discovered

* Wait for ordered extents outside the transaction.  This isn't required
for locking rules but does improve transaction latencies

* Reduce contention on the alloc_mutex by dropping it while incrementing
refs on a node/leaf and while dropping refs on a leaf.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index eccdb9562ba8..591a30208acd 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -75,6 +75,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
 	for (i = 0; i < num_pages; i++) {
 		if (!pages[i])
 			break;
+		ClearPageChecked(pages[i]);
 		unlock_page(pages[i]);
 		mark_page_accessed(pages[i]);
 		page_cache_release(pages[i]);
-- 
cgit v1.2.2


From f421950f86bf96a11fef932e167ab2e70d4c43a0 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 22 Jul 2008 11:18:09 -0400
Subject: Btrfs: Fix some data=ordered related data corruptions

Stress testing was showing data checksum errors, most of which were caused
by a lookup bug in the extent_map tree.  The tree was caching the last
pointer returned, and searches would check the last pointer first.

But, search callers also expect the search to return the very first
matching extent in the range, which wasn't always true with the last
pointer usage.

For now, the code to cache the last return value is just removed.  It is
easy to fix, but I think lookups are rare enough that it isn't required anymore.

This commit also replaces do_sync_mapping_range with a local copy of the
related functions.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 591a30208acd..e5ffb66ad320 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -381,14 +381,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 			break;
 		}
 		if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
-			start = em->start + em->len;
-			free_extent_map(em);
-			spin_unlock(&em_tree->lock);
-			if (start < end) {
-				len = end - start + 1;
-				continue;
-			}
-			break;
+			printk(KERN_CRIT "inode %lu trying to drop pinned "
+			       "extent start %llu end %llu, em [%llu %llu]\n",
+			       inode->i_ino,
+			       (unsigned long long)start,
+			       (unsigned long long)end,
+			       (unsigned long long)em->start,
+			       (unsigned long long)em->len);
 		}
 		remove_extent_mapping(em_tree, em);
 
-- 
cgit v1.2.2


From 017e5369eb353559d68a11d4a718faa634533821 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 28 Jul 2008 15:32:51 -0400
Subject: Btrfs: Leaf reference cache update

This changes the reference cache to make a single cache per root
instead of one cache per transaction, and to key by the byte number
of the disk block instead of the keys inside.

This makes it much less likely to have cache misses if a snapshot
or something has an extra reference on a higher node or a leaf while
the first transaction that added the leaf into the cache is dropping.

Some throttling is added to functions that free blocks heavily so they
wait for old transactions to drop.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e5ffb66ad320..3efec25e34b0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -347,7 +347,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		btrfs_update_inode(trans, root, inode);
 	}
 failed:
-	err = btrfs_end_transaction_throttle(trans, root);
+	err = btrfs_end_transaction(trans, root);
 out_unlock:
 	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 	return err;
-- 
cgit v1.2.2


From ab78c84de1ce4db1b2a2cef361625ad80abbab3f Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 29 Jul 2008 16:15:18 -0400
Subject: Btrfs: Throttle operations if the reference cache gets too large

A large reference cache is directly related to a lot of work pending
for the cleaner thread.  This throttles back new operations based on
the size of the reference cache so the cleaner thread will be able to keep
up.

Overall, this actually makes the FS faster because the cleaner thread will
be more likely to find things in cache.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3efec25e34b0..ded5281f8463 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -974,6 +974,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
 		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
 			btrfs_btree_balance_dirty(root, 1);
+		btrfs_throttle(root);
 		cond_resched();
 	}
 out:
-- 
cgit v1.2.2


From bcc63abbf3e9bf948a1b0129b3e6120ec7d7f698 Mon Sep 17 00:00:00 2001
From: Yan <zheng.yan@oracle.com>
Date: Wed, 30 Jul 2008 16:29:20 -0400
Subject: Btrfs: implement memory reclaim for leaf reference cache

The memory reclaiming issue happens when snapshot exists. In that
case, some cache entries may not be used during old snapshot dropping,
so they will remain in the cache until umount.

The patch adds a field to struct btrfs_leaf_ref to record create time. Besides,
the patch makes all dead roots of a given snapshot linked together in order of
create time. After a old snapshot was completely dropped, we check the dead
root list and remove all cache entries created before the oldest dead root in
the list.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ded5281f8463..412ab4a26382 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1095,4 +1095,3 @@ struct file_operations btrfs_file_operations = {
 	.compat_ioctl	= btrfs_ioctl,
 #endif
 };
-
-- 
cgit v1.2.2


From 0ee0fda06b943d7ef65f3cec50ab9d427a2baf97 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Wed, 30 Jul 2008 16:54:26 -0400
Subject: Btrfs: Add compatibility for kernels >= 2.6.27-rc1

Add a couple of #if's to follow API changes.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 412ab4a26382..388ac397c2c8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -886,7 +886,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 #ifdef REMOVE_SUID_PATH
 	err = remove_suid(&file->f_path);
 #else
+# if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
+	err = file_remove_suid(file);
+# else
 	err = remove_suid(fdentry(file));
+# endif
 #endif
 	if (err)
 		goto out_nolock;
-- 
cgit v1.2.2


From 37d1aeee3990385e9bb436c50c2f7e120a668df6 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 31 Jul 2008 10:48:37 -0400
Subject: Btrfs: Throttle tuning

This avoids waiting for transactions with pages locked by breaking out
the code to wait for the current transaction to close into a function
called by btrfs_throttle.

It also lowers the limits for where we start throttling.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 388ac397c2c8..d3f2fe0b7c6c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -253,7 +253,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	end_of_last_block = start_pos + num_bytes - 1;
 
 	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-	trans = btrfs_start_transaction(root, 1);
+	trans = btrfs_join_transaction(root, 1);
 	if (!trans) {
 		err = -ENOMEM;
 		goto out_unlock;
-- 
cgit v1.2.2


From 3ce7e67a069b919be774a341b82fc20978b7f69d Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 31 Jul 2008 15:42:54 -0400
Subject: Btrfs: Drop some debugging around the extent_map pinned flag

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d3f2fe0b7c6c..c78f184ee5cc 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -380,15 +380,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 			spin_unlock(&em_tree->lock);
 			break;
 		}
-		if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
-			printk(KERN_CRIT "inode %lu trying to drop pinned "
-			       "extent start %llu end %llu, em [%llu %llu]\n",
-			       inode->i_ino,
-			       (unsigned long long)start,
-			       (unsigned long long)end,
-			       (unsigned long long)em->start,
-			       (unsigned long long)em->len);
-		}
+		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
 		remove_extent_mapping(em_tree, em);
 
 		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
-- 
cgit v1.2.2


From f87f057b49ee52cf5c627ab27a706e3252767c9f Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 1 Aug 2008 11:27:23 -0400
Subject: Btrfs: Improve and cleanup locking done by walk_down_tree

While dropping snapshots, walk_down_tree does most of the work of checking
reference counts and limiting tree traversal to just the blocks that
we are freeing.

It dropped and held the allocation mutex in strange and confusing ways,
this commit changes it to only hold the mutex while actually freeing a block.

The rest of the checks around reference counts should be safe without the lock
because we only allow one process in btrfs_drop_snapshot at a time.  Other
processes dropping reference counts should not drop it to 1 because
their tree roots already have an extra ref on the block.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c78f184ee5cc..8915f2dc1bce 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -338,6 +338,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
 		BUG_ON(err);
 		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
+
+		/*
+		 * an ugly way to do all the prop accounting around
+		 * the page bits and mapping tags
+		 */
+		set_page_writeback(pages[0]);
+		end_page_writeback(pages[0]);
 		did_inline = 1;
 	}
 	if (end_pos > isize) {
@@ -833,11 +840,7 @@ again:
 			      start_pos, last_pos - 1, GFP_NOFS);
 	}
 	for (i = 0; i < num_pages; i++) {
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-		ClearPageDirty(pages[i]);
-#else
-		cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
-#endif
+		clear_page_dirty_for_io(pages[i]);
 		set_page_extent_mapped(pages[i]);
 		WARN_ON(!PageLocked(pages[i]));
 	}
-- 
cgit v1.2.2


From ea8c281947950fac5f78818b767821d696c9512a Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 4 Aug 2008 23:17:27 -0400
Subject: Btrfs: Maintain a list of inodes that are delalloc and a way to wait
 on them

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8915f2dc1bce..eb8e4556fa71 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -312,8 +312,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		 * to reset the delalloc bit on things that already have
 		 * extents reserved.
 		 */
-		set_extent_delalloc(io_tree, start_pos,
-				    end_of_last_block, GFP_NOFS);
+		btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
 		for (i = 0; i < num_pages; i++) {
 			struct page *p = pages[i];
 			SetPageUptodate(p);
-- 
cgit v1.2.2


From a1b32a5932cfac7c38b442582285f3da2a09dfd8 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 5 Sep 2008 16:09:51 -0400
Subject: Btrfs: Add debugging checks to track down corrupted metadata

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index eb8e4556fa71..e9e86fbaa243 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -39,9 +39,10 @@
 #include "compat.h"
 
 
-static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
-				struct page **prepared_pages,
-				const char __user * buf)
+static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
+					 int write_bytes,
+					 struct page **prepared_pages,
+					 const char __user * buf)
 {
 	long page_fault = 0;
 	int i;
@@ -69,7 +70,7 @@ static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
 	return page_fault ? -EFAULT : 0;
 }
 
-static void btrfs_drop_pages(struct page **pages, size_t num_pages)
+static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
 {
 	size_t i;
 	for (i = 0; i < num_pages; i++) {
@@ -359,7 +360,7 @@ out_unlock:
 	return err;
 }
 
-int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
+int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 {
 	struct extent_map *em;
 	struct extent_map *split = NULL;
@@ -515,7 +516,7 @@ out:
  * it is either truncated or split.  Anything entirely inside the range
  * is deleted from the tree.
  */
-int btrfs_drop_extents(struct btrfs_trans_handle *trans,
+int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode,
 		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
 {
@@ -785,7 +786,7 @@ out:
 /*
  * this gets pages into the page cache and locks them down
  */
-static int prepare_pages(struct btrfs_root *root, struct file *file,
+static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
 			 struct page **pages, size_t num_pages,
 			 loff_t pos, unsigned long first_index,
 			 unsigned long last_index, size_t write_bytes)
-- 
cgit v1.2.2


From e02119d5a7b4396c5a872582fddc8bd6d305a70a Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 5 Sep 2008 16:13:11 -0400
Subject: Btrfs: Add a write ahead tree log to optimize synchronous operations

File syncs and directory syncs are optimized by copying their
items into a special (copy-on-write) log tree.  There is one log tree per
subvolume and the btrfs super block points to a tree of log tree roots.

After a crash, items are copied out of the log tree and back into the
subvolume.  See tree-log.c for all the details.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e9e86fbaa243..84ecf3ab8511 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -36,6 +36,8 @@
 #include "btrfs_inode.h"
 #include "ioctl.h"
 #include "print-tree.h"
+#include "tree-log.h"
+#include "locking.h"
 #include "compat.h"
 
 
@@ -988,10 +990,27 @@ out_nolock:
 	*ppos = pos;
 
 	if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-		err = sync_page_range(inode, inode->i_mapping,
-				      start_pos, num_written);
+		struct btrfs_trans_handle *trans;
+
+		err = btrfs_fdatawrite_range(inode->i_mapping, start_pos,
+					     start_pos + num_written -1,
+					     WB_SYNC_NONE);
+		if (err < 0)
+			num_written = err;
+
+		err = btrfs_wait_on_page_writeback_range(inode->i_mapping,
+				 start_pos, start_pos + num_written - 1);
 		if (err < 0)
 			num_written = err;
+
+		trans = btrfs_start_transaction(root, 1);
+		ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
+		if (ret == 0) {
+			btrfs_sync_log(trans, root);
+			btrfs_end_transaction(trans, root);
+		} else {
+			btrfs_commit_transaction(trans, root);
+		}
 	} else if (num_written > 0 && (file->f_flags & O_DIRECT)) {
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
 		do_sync_file_range(file, start_pos,
@@ -1019,8 +1038,7 @@ int btrfs_release_file(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-static int btrfs_sync_file(struct file *file,
-			   struct dentry *dentry, int datasync)
+int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1043,6 +1061,8 @@ static int btrfs_sync_file(struct file *file,
 	}
 	mutex_unlock(&root->fs_info->trans_mutex);
 
+	filemap_fdatawait(inode->i_mapping);
+
 	/*
 	 * ok we haven't committed the transaction yet, lets do a commit
 	 */
@@ -1054,7 +1074,16 @@ static int btrfs_sync_file(struct file *file,
 		ret = -ENOMEM;
 		goto out;
 	}
-	ret = btrfs_commit_transaction(trans, root);
+
+	ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
+	if (ret < 0)
+		goto out;
+	if (ret > 0) {
+		ret = btrfs_commit_transaction(trans, root);
+	} else {
+		btrfs_sync_log(trans, root);
+		ret = btrfs_end_transaction(trans, root);
+	}
 out:
 	return ret > 0 ? EIO : ret;
 }
-- 
cgit v1.2.2


From 49eb7e46d47ea72a9bd2a5f8cedb04f5159cc277 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 11 Sep 2008 15:53:12 -0400
Subject: Btrfs: Dir fsync optimizations

Drop i_mutex during the commit

Don't bother doing the fsync at all unless the dir is marked as dirtied
and needing fsync in this transaction.  For directories, this means
that someone has unlinked a file from the dir without fsyncing the
file.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 84ecf3ab8511..58b329ddb426 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1061,7 +1061,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 	}
 	mutex_unlock(&root->fs_info->trans_mutex);
 
+	root->fs_info->tree_log_batch++;
 	filemap_fdatawait(inode->i_mapping);
+	root->fs_info->tree_log_batch++;
 
 	/*
 	 * ok we haven't committed the transaction yet, lets do a commit
@@ -1076,14 +1078,29 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 	}
 
 	ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
-	if (ret < 0)
+	if (ret < 0) {
 		goto out;
+	}
+
+	/* we've logged all the items and now have a consistent
+	 * version of the file in the log.  It is possible that
+	 * someone will come in and modify the file, but that's
+	 * fine because the log is consistent on disk, and we
+	 * have references to all of the file's extents
+	 *
+	 * It is possible that someone will come in and log the
+	 * file again, but that will end up using the synchronization
+	 * inside btrfs_sync_log to keep things safe.
+	 */
+	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+
 	if (ret > 0) {
 		ret = btrfs_commit_transaction(trans, root);
 	} else {
 		btrfs_sync_log(trans, root);
 		ret = btrfs_end_transaction(trans, root);
 	}
+	mutex_lock(&file->f_dentry->d_inode->i_mutex);
 out:
 	return ret > 0 ? EIO : ret;
 }
-- 
cgit v1.2.2


From 31840ae1a6b433ca0e6a8d341756ff478bbf959e Mon Sep 17 00:00:00 2001
From: Zheng Yan <zheng.yan@oracle.com>
Date: Tue, 23 Sep 2008 13:14:14 -0400
Subject: Btrfs: Full back reference support

This patch makes the back reference system to explicit record the
location of parent node for all types of extents. The location of
parent node is placed into the offset field of backref key. Every
time a tree block is balanced, the back references for the affected
lower level extents are updated.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 121 ++++++++++++++++++++++++++------------------------------
 1 file changed, 56 insertions(+), 65 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 58b329ddb426..48a702d41c8c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -524,6 +524,9 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 {
 	u64 extent_end = 0;
 	u64 search_start = start;
+	u64 leaf_start;
+	u64 root_gen;
+	u64 root_owner;
 	struct extent_buffer *leaf;
 	struct btrfs_file_extent_item *extent;
 	struct btrfs_path *path;
@@ -562,6 +565,9 @@ next_slot:
 		bookend = 0;
 		found_extent = 0;
 		found_inline = 0;
+		leaf_start = 0;
+		root_gen = 0;
+		root_owner = 0;
 		extent = NULL;
 		leaf = path->nodes[0];
 		slot = path->slots[0];
@@ -628,27 +634,18 @@ next_slot:
 			search_start = extent_end;
 		if (end <= extent_end && start >= key.offset && found_inline) {
 			*hint_byte = EXTENT_MAP_INLINE;
-			continue;
+			goto out;
+		}
+
+		if (found_extent) {
+			read_extent_buffer(leaf, &old, (unsigned long)extent,
+					   sizeof(old));
+			root_gen = btrfs_header_generation(leaf);
+			root_owner = btrfs_header_owner(leaf);
+			leaf_start = leaf->start;
 		}
+
 		if (end < extent_end && end >= key.offset) {
-			if (found_extent) {
-				u64 disk_bytenr =
-				    btrfs_file_extent_disk_bytenr(leaf, extent);
-				u64 disk_num_bytes =
-				    btrfs_file_extent_disk_num_bytes(leaf,
-								      extent);
-				read_extent_buffer(leaf, &old,
-						   (unsigned long)extent,
-						   sizeof(old));
-				if (disk_bytenr != 0) {
-					ret = btrfs_inc_extent_ref(trans, root,
-					         disk_bytenr, disk_num_bytes,
-						 root->root_key.objectid,
-						 trans->transid,
-						 key.objectid, end);
-					BUG_ON(ret);
-				}
-			}
 			bookend = 1;
 			if (found_inline && start <= key.offset)
 				keep = 1;
@@ -687,49 +684,12 @@ next_slot:
 		}
 		/* delete the entire extent */
 		if (!keep) {
-			u64 disk_bytenr = 0;
-			u64 disk_num_bytes = 0;
-			u64 extent_num_bytes = 0;
-			u64 root_gen;
-			u64 root_owner;
-
-			root_gen = btrfs_header_generation(leaf);
-			root_owner = btrfs_header_owner(leaf);
-			if (found_extent) {
-				disk_bytenr =
-				      btrfs_file_extent_disk_bytenr(leaf,
-								     extent);
-				disk_num_bytes =
-				      btrfs_file_extent_disk_num_bytes(leaf,
-								       extent);
-				extent_num_bytes =
-				      btrfs_file_extent_num_bytes(leaf, extent);
-				*hint_byte =
-					btrfs_file_extent_disk_bytenr(leaf,
-								      extent);
-			}
 			ret = btrfs_del_item(trans, root, path);
 			/* TODO update progress marker and return */
 			BUG_ON(ret);
-			btrfs_release_path(root, path);
 			extent = NULL;
-			if (found_extent && disk_bytenr != 0) {
-				dec_i_blocks(inode, extent_num_bytes);
-				ret = btrfs_free_extent(trans, root,
-						disk_bytenr,
-						disk_num_bytes,
-						root_owner,
-						root_gen, inode->i_ino,
-						key.offset, 0);
-			}
-
-			BUG_ON(ret);
-			if (!bookend && search_start >= end) {
-				ret = 0;
-				goto out;
-			}
-			if (!bookend)
-				continue;
+			btrfs_release_path(root, path);
+			/* the extent will be freed later */
 		}
 		if (bookend && found_inline && start <= key.offset) {
 			u32 new_size;
@@ -737,10 +697,13 @@ next_slot:
 						   extent_end - end);
 			dec_i_blocks(inode, (extent_end - key.offset) -
 					(extent_end - end));
-			btrfs_truncate_item(trans, root, path, new_size, 0);
+			ret = btrfs_truncate_item(trans, root, path,
+						  new_size, 0);
+			BUG_ON(ret);
 		}
 		/* create bookend, splitting the extent in two */
 		if (bookend && found_extent) {
+			u64 disk_bytenr;
 			struct btrfs_key ins;
 			ins.objectid = inode->i_ino;
 			ins.offset = end;
@@ -748,13 +711,9 @@ next_slot:
 			btrfs_release_path(root, path);
 			ret = btrfs_insert_empty_item(trans, root, path, &ins,
 						      sizeof(*extent));
+			BUG_ON(ret);
 
 			leaf = path->nodes[0];
-			if (ret) {
-				btrfs_print_leaf(root, leaf);
-				printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep);
-			}
-			BUG_ON(ret);
 			extent = btrfs_item_ptr(leaf, path->slots[0],
 						struct btrfs_file_extent_item);
 			write_extent_buffer(leaf, &old,
@@ -770,11 +729,43 @@ next_slot:
 						   BTRFS_FILE_EXTENT_REG);
 
 			btrfs_mark_buffer_dirty(path->nodes[0]);
-			if (le64_to_cpu(old.disk_bytenr) != 0) {
+
+			disk_bytenr = le64_to_cpu(old.disk_bytenr);
+			if (disk_bytenr != 0) {
+				ret = btrfs_inc_extent_ref(trans, root,
+						disk_bytenr,
+						le64_to_cpu(old.disk_num_bytes),
+						leaf->start,
+						root->root_key.objectid,
+						trans->transid,
+						ins.objectid, ins.offset);
+				BUG_ON(ret);
+			}
+			btrfs_release_path(root, path);
+			if (disk_bytenr != 0) {
 				inode->i_blocks +=
 				      btrfs_file_extent_num_bytes(leaf,
 								  extent) >> 9;
 			}
+		}
+
+		if (found_extent && !keep) {
+			u64 disk_bytenr = le64_to_cpu(old.disk_bytenr);
+
+			if (disk_bytenr != 0) {
+				dec_i_blocks(inode, le64_to_cpu(old.num_bytes));
+				ret = btrfs_free_extent(trans, root,
+						disk_bytenr,
+						le64_to_cpu(old.disk_num_bytes),
+						leaf_start, root_owner,
+						root_gen, key.objectid,
+						key.offset, 0);
+				BUG_ON(ret);
+				*hint_byte = disk_bytenr;
+			}
+		}
+
+		if (search_start >= end) {
 			ret = 0;
 			goto out;
 		}
-- 
cgit v1.2.2


From 2b1f55b0f0d0d1a66470ef4ea2696cd5dd741a12 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 24 Sep 2008 11:48:04 -0400
Subject: Remove Btrfs compat code for older kernels

Btrfs had compatibility code for kernels back to 2.6.18.  These have
been removed, and will be maintained in a separate backport
git tree from now on.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 48a702d41c8c..8856570a0ebd 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -871,15 +871,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		goto out_nolock;
 	if (count == 0)
 		goto out_nolock;
-#ifdef REMOVE_SUID_PATH
-	err = remove_suid(&file->f_path);
-#else
-# if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
+
 	err = file_remove_suid(file);
-# else
-	err = remove_suid(fdentry(file));
-# endif
-#endif
 	if (err)
 		goto out_nolock;
 	file_update_time(file);
@@ -1003,17 +996,10 @@ out_nolock:
 			btrfs_commit_transaction(trans, root);
 		}
 	} else if (num_written > 0 && (file->f_flags & O_DIRECT)) {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
-		do_sync_file_range(file, start_pos,
-				      start_pos + num_written - 1,
-				      SYNC_FILE_RANGE_WRITE |
-				      SYNC_FILE_RANGE_WAIT_AFTER);
-#else
 		do_sync_mapping_range(inode->i_mapping, start_pos,
 				      start_pos + num_written - 1,
 				      SYNC_FILE_RANGE_WRITE |
 				      SYNC_FILE_RANGE_WAIT_AFTER);
-#endif
 		invalidate_mapping_pages(inode->i_mapping,
 		      start_pos >> PAGE_CACHE_SHIFT,
 		     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
@@ -1097,12 +1083,7 @@ out:
 }
 
 static struct vm_operations_struct btrfs_file_vm_ops = {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
-	.nopage         = filemap_nopage,
-	.populate       = filemap_populate,
-#else
 	.fault		= filemap_fault,
-#endif
 	.page_mkwrite	= btrfs_page_mkwrite,
 };
 
@@ -1118,9 +1099,6 @@ struct file_operations btrfs_file_operations = {
 	.read		= do_sync_read,
 	.aio_read       = generic_file_aio_read,
 	.splice_read	= generic_file_splice_read,
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-	.sendfile	= generic_file_sendfile,
-#endif
 	.write		= btrfs_file_write,
 	.mmap		= btrfs_file_mmap,
 	.open		= generic_file_open,
-- 
cgit v1.2.2


From 5b21f2ed3f2947b5195b65c9fdbdd9e52904cc03 Mon Sep 17 00:00:00 2001
From: Zheng Yan <zheng.yan@oracle.com>
Date: Fri, 26 Sep 2008 10:05:38 -0400
Subject: Btrfs: extent_map and data=ordered fixes for space balancing

* Add an EXTENT_BOUNDARY state bit to keep the writepage code
from merging data extents that are in the process of being
relocated.  This allows us to do accounting for them properly.

* The balancing code relocates data extents indepdent of the underlying
inode.  The extent_map code was modified to properly account for
things moving around (invalidating extent_map caches in the inode).

* Don't take the drop_mutex in the create_subvol ioctl.  It isn't
required.

* Fix walking of the ordered extent list to avoid races with sys_unlink

* Change the lock ordering rules.  Transaction start goes outside
the drop_mutex.  This allows btrfs_commit_transaction to directly
drop the relocation trees.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8856570a0ebd..1b7e51a9db0f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -294,7 +294,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 						       last_pos_in_file,
 						       0, 0, hole_size, 0);
 			btrfs_drop_extent_cache(inode, last_pos_in_file,
-					last_pos_in_file + hole_size -1);
+					last_pos_in_file + hole_size - 1, 0);
 			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 			btrfs_check_file(root, inode);
 		}
@@ -337,7 +337,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		inline_size -= start_pos;
 		err = insert_inline_extent(trans, root, inode, start_pos,
 					   inline_size, pages, 0, num_pages);
-		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
+		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0);
 		BUG_ON(err);
 		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 
@@ -362,7 +362,8 @@ out_unlock:
 	return err;
 }
 
-int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
+int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+			    int skip_pinned)
 {
 	struct extent_map *em;
 	struct extent_map *split = NULL;
@@ -371,6 +372,7 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 	u64 len = end - start + 1;
 	int ret;
 	int testend = 1;
+	unsigned long flags;
 
 	WARN_ON(end < start);
 	if (end == (u64)-1) {
@@ -389,6 +391,23 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 			spin_unlock(&em_tree->lock);
 			break;
 		}
+		flags = em->flags;
+		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
+			spin_unlock(&em_tree->lock);
+			if (em->start <= start &&
+			    (!testend || em->start + em->len >= start + len)) {
+				free_extent_map(em);
+				break;
+			}
+			if (start < em->start) {
+				len = em->start - start;
+			} else {
+				len = start + len - (em->start + em->len);
+				start = em->start + em->len;
+			}
+			free_extent_map(em);
+			continue;
+		}
 		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
 		remove_extent_mapping(em_tree, em);
 
@@ -398,7 +417,7 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 			split->len = start - em->start;
 			split->block_start = em->block_start;
 			split->bdev = em->bdev;
-			split->flags = em->flags;
+			split->flags = flags;
 			ret = add_extent_mapping(em_tree, split);
 			BUG_ON(ret);
 			free_extent_map(split);
@@ -412,7 +431,7 @@ int noinline btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
 			split->start = start + len;
 			split->len = em->start + em->len - (start + len);
 			split->bdev = em->bdev;
-			split->flags = em->flags;
+			split->flags = flags;
 
 			split->block_start = em->block_start + diff;
 
@@ -541,7 +560,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	int recow;
 	int ret;
 
-	btrfs_drop_extent_cache(inode, start, end - 1);
+	btrfs_drop_extent_cache(inode, start, end - 1, 0);
 
 	path = btrfs_alloc_path();
 	if (!path)
-- 
cgit v1.2.2


From d352ac68148b69937d39ca5d48bcc4478e118dbf Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 29 Sep 2008 15:18:18 -0400
Subject: Btrfs: add and improve comments

This improves the comments at the top of many functions.  It didn't
dive into the guts of functions because I was trying to
avoid merging problems with the new allocator and back reference work.

extent-tree.c and volumes.c were both skipped, and there is definitely
more work todo in cleaning and commenting the code.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1b7e51a9db0f..3088a1184483 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -41,6 +41,9 @@
 #include "compat.h"
 
 
+/* simple helper to fault in pages and copy.  This should go away
+ * and be replaced with calls into generic code.
+ */
 static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
 					 int write_bytes,
 					 struct page **prepared_pages,
@@ -72,12 +75,19 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
 	return page_fault ? -EFAULT : 0;
 }
 
+/*
+ * unlocks pages after btrfs_file_write is done with them
+ */
 static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
 {
 	size_t i;
 	for (i = 0; i < num_pages; i++) {
 		if (!pages[i])
 			break;
+		/* page checked is some magic around finding pages that
+		 * have been modified without going through btrfs_set_page_dirty
+		 * clear it here
+		 */
 		ClearPageChecked(pages[i]);
 		unlock_page(pages[i]);
 		mark_page_accessed(pages[i]);
@@ -85,6 +95,10 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
 	}
 }
 
+/* this does all the hard work for inserting an inline extent into
+ * the btree.  Any existing inline extent is extended as required to make room,
+ * otherwise things are inserted as required into the btree
+ */
 static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode,
 				u64 offset, size_t size,
@@ -228,6 +242,14 @@ fail:
 	return err;
 }
 
+/*
+ * after copy_from_user, pages need to be dirtied and we need to make
+ * sure holes are created between the current EOF and the start of
+ * any next extents (if required).
+ *
+ * this also makes the decision about creating an inline extent vs
+ * doing real data extents, marking pages dirty and delalloc as required.
+ */
 static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
 				   struct file *file,
@@ -362,6 +384,10 @@ out_unlock:
 	return err;
 }
 
+/*
+ * this drops all the extents in the cache that intersect the range
+ * [start, end].  Existing extents are split as required.
+ */
 int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			    int skip_pinned)
 {
@@ -536,6 +562,9 @@ out:
  * If an extent intersects the range but is not entirely inside the range
  * it is either truncated or split.  Anything entirely inside the range
  * is deleted from the tree.
+ *
+ * inline_limit is used to tell this code which offsets in the file to keep
+ * if they contain inline extents.
  */
 int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode,
@@ -796,7 +825,9 @@ out:
 }
 
 /*
- * this gets pages into the page cache and locks them down
+ * this gets pages into the page cache and locks them down, it also properly
+ * waits for data=ordered extents to finish before allowing the pages to be
+ * modified.
  */
 static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
 			 struct page **pages, size_t num_pages,
@@ -1034,6 +1065,17 @@ int btrfs_release_file(struct inode * inode, struct file * filp)
 	return 0;
 }
 
+/*
+ * fsync call for both files and directories.  This logs the inode into
+ * the tree log instead of forcing full commits whenever possible.
+ *
+ * It needs to call filemap_fdatawait so that all ordered extent updates are
+ * in the metadata btree are up to date for copying to the log.
+ *
+ * It drops the inode mutex before doing the tree log commit.  This is an
+ * important optimization for directories because holding the mutex prevents
+ * new operations on the dir while we write to disk.
+ */
 int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
-- 
cgit v1.2.2


From cb843a6f513a1a91c54951005e60bd9b95bdf973 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 3 Oct 2008 12:30:02 -0400
Subject: Btrfs: O_DIRECT writes via buffered writes + invaldiate

This reworks the btrfs O_DIRECT write code a bit.  It had always fallen
back to buffered IO and done an invalidate, but needed to be updated
for the data=ordered code.  The invalidate wasn't actually removing pages
because they were still inside an ordered extent.

This also combines the O_DIRECT/O_SYNC paths where possible, and kicks
off IO in the main btrfs_file_write loop to keep the pipe down the the
disk full as we process long writes.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 64 +++++++++++++++++++++++++++++++--------------------------
 1 file changed, 35 insertions(+), 29 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3088a1184483..a03d1bbb19ad 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -905,6 +905,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	struct page *pinned[2];
 	unsigned long first_index;
 	unsigned long last_index;
+	int will_write;
+
+	will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
+		      (file->f_flags & O_DIRECT));
 
 	nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
 		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
@@ -1001,15 +1005,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		if (ret)
 			goto out;
 
+		if (will_write) {
+			btrfs_fdatawrite_range(inode->i_mapping, pos,
+					       pos + write_bytes - 1,
+					       WB_SYNC_NONE);
+		} else {
+			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+							   num_pages);
+			if (num_pages <
+			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+				btrfs_btree_balance_dirty(root, 1);
+			btrfs_throttle(root);
+		}
+
 		buf += write_bytes;
 		count -= write_bytes;
 		pos += write_bytes;
 		num_written += write_bytes;
 
-		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
-		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
-			btrfs_btree_balance_dirty(root, 1);
-		btrfs_throttle(root);
 		cond_resched();
 	}
 out:
@@ -1023,36 +1036,29 @@ out_nolock:
 		page_cache_release(pinned[1]);
 	*ppos = pos;
 
-	if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+	if (num_written > 0 && will_write) {
 		struct btrfs_trans_handle *trans;
 
-		err = btrfs_fdatawrite_range(inode->i_mapping, start_pos,
-					     start_pos + num_written -1,
-					     WB_SYNC_NONE);
-		if (err < 0)
-			num_written = err;
-
-		err = btrfs_wait_on_page_writeback_range(inode->i_mapping,
-				 start_pos, start_pos + num_written - 1);
-		if (err < 0)
+		err = btrfs_wait_ordered_range(inode, start_pos, num_written);
+		if (err)
 			num_written = err;
 
-		trans = btrfs_start_transaction(root, 1);
-		ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
-		if (ret == 0) {
-			btrfs_sync_log(trans, root);
-			btrfs_end_transaction(trans, root);
-		} else {
-			btrfs_commit_transaction(trans, root);
+		if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
+			trans = btrfs_start_transaction(root, 1);
+			ret = btrfs_log_dentry_safe(trans, root,
+						    file->f_dentry);
+			if (ret == 0) {
+				btrfs_sync_log(trans, root);
+				btrfs_end_transaction(trans, root);
+			} else {
+				btrfs_commit_transaction(trans, root);
+			}
+		}
+		if (file->f_flags & O_DIRECT) {
+			invalidate_mapping_pages(inode->i_mapping,
+			      start_pos >> PAGE_CACHE_SHIFT,
+			     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
 		}
-	} else if (num_written > 0 && (file->f_flags & O_DIRECT)) {
-		do_sync_mapping_range(inode->i_mapping, start_pos,
-				      start_pos + num_written - 1,
-				      SYNC_FILE_RANGE_WRITE |
-				      SYNC_FILE_RANGE_WAIT_AFTER);
-		invalidate_mapping_pages(inode->i_mapping,
-		      start_pos >> PAGE_CACHE_SHIFT,
-		     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
 	}
 	current->backing_dev_info = NULL;
 	return num_written ? num_written : err;
-- 
cgit v1.2.2


From a76a3cd40c1127ca199d4f7f37bf0d541bf44eb2 Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Thu, 9 Oct 2008 11:46:29 -0400
Subject: Btrfs: Count space allocated to file in bytes

This patch makes btrfs count space allocated to file in bytes instead
of 512 byte sectors.

Everything else in btrfs uses a byte count instead of sector sizes or
blocks sizes, so this fits better.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
 fs/btrfs/file.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a03d1bbb19ad..18dfdf5f91d1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -193,7 +193,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
 			leaf = path->nodes[0];
 			ei = btrfs_item_ptr(leaf, path->slots[0],
 					    struct btrfs_file_extent_item);
-			inode->i_blocks += (offset + size - found_end) >> 9;
+			inode_add_bytes(inode, offset + size - found_end);
 		}
 		if (found_end < offset) {
 			ptr = btrfs_file_extent_inline_start(ei) + found_size;
@@ -203,7 +203,7 @@ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
 insert:
 		btrfs_release_path(root, path);
 		datasize = offset + size - key.offset;
-		inode->i_blocks += datasize >> 9;
+		inode_add_bytes(inode, datasize);
 		datasize = btrfs_file_extent_calc_inline_size(datasize);
 		ret = btrfs_insert_empty_item(trans, root, path, &key,
 					      datasize);
@@ -713,7 +713,8 @@ next_slot:
 								      extent);
 				if (btrfs_file_extent_disk_bytenr(leaf,
 								  extent)) {
-					dec_i_blocks(inode, old_num - new_num);
+					inode_sub_bytes(inode, old_num -
+							new_num);
 				}
 				btrfs_set_file_extent_num_bytes(leaf, extent,
 								new_num);
@@ -724,14 +725,17 @@ next_slot:
 				u32 new_size;
 				new_size = btrfs_file_extent_calc_inline_size(
 						   inline_limit - key.offset);
-				dec_i_blocks(inode, (extent_end - key.offset) -
-					(inline_limit - key.offset));
+				inode_sub_bytes(inode, extent_end -
+						inline_limit);
 				btrfs_truncate_item(trans, root, path,
 						    new_size, 1);
 			}
 		}
 		/* delete the entire extent */
 		if (!keep) {
+			if (found_inline)
+				inode_sub_bytes(inode, extent_end -
+						key.offset);
 			ret = btrfs_del_item(trans, root, path);
 			/* TODO update progress marker and return */
 			BUG_ON(ret);
@@ -743,8 +747,7 @@ next_slot:
 			u32 new_size;
 			new_size = btrfs_file_extent_calc_inline_size(
 						   extent_end - end);
-			dec_i_blocks(inode, (extent_end - key.offset) -
-					(extent_end - end));
+			inode_sub_bytes(inode, end - key.offset);
 			ret = btrfs_truncate_item(trans, root, path,
 						  new_size, 0);
 			BUG_ON(ret);
@@ -791,9 +794,7 @@ next_slot:
 			}
 			btrfs_release_path(root, path);
 			if (disk_bytenr != 0) {
-				inode->i_blocks +=
-				      btrfs_file_extent_num_bytes(leaf,
-								  extent) >> 9;
+				inode_add_bytes(inode, extent_end - end);
 			}
 		}
 
@@ -801,7 +802,8 @@ next_slot:
 			u64 disk_bytenr = le64_to_cpu(old.disk_bytenr);
 
 			if (disk_bytenr != 0) {
-				dec_i_blocks(inode, le64_to_cpu(old.num_bytes));
+				inode_sub_bytes(inode,
+						le64_to_cpu(old.num_bytes));
 				ret = btrfs_free_extent(trans, root,
 						disk_bytenr,
 						le64_to_cpu(old.disk_num_bytes),
-- 
cgit v1.2.2


From 3bb1a1bc42f2ae9582c28adf620484efcd4da38d Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Thu, 9 Oct 2008 11:46:24 -0400
Subject: Btrfs: Remove offset field from struct btrfs_extent_ref

The offset field in struct btrfs_extent_ref records the position
inside file that file extent is referenced by. In the new back
reference system, tree leaves holding references to file extent
are recorded explicitly. We can scan these tree leaves very quickly, so the
offset field is not required.

This patch also makes the back reference system check the objectid
when extents are in deleting.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
 fs/btrfs/file.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 18dfdf5f91d1..69abbe19add2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -788,8 +788,7 @@ next_slot:
 						le64_to_cpu(old.disk_num_bytes),
 						leaf->start,
 						root->root_key.objectid,
-						trans->transid,
-						ins.objectid, ins.offset);
+						trans->transid, ins.objectid);
 				BUG_ON(ret);
 			}
 			btrfs_release_path(root, path);
@@ -808,8 +807,7 @@ next_slot:
 						disk_bytenr,
 						le64_to_cpu(old.disk_num_bytes),
 						leaf_start, root_owner,
-						root_gen, key.objectid,
-						key.offset, 0);
+						root_gen, key.objectid, 0);
 				BUG_ON(ret);
 				*hint_byte = disk_bytenr;
 			}
-- 
cgit v1.2.2


From c8b978188c9a0fd3d535c13debd19d522b726f1f Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Wed, 29 Oct 2008 14:49:59 -0400
Subject: Btrfs: Add zlib compression support

This is a large change for adding compression on reading and writing,
both for inline and regular extents.  It does some fairly large
surgery to the writeback paths.

Compression is off by default and enabled by mount -o compress.  Even
when the -o compress mount option is not used, it is possible to read
compressed extents off the disk.

If compression for a given set of pages fails to make them smaller, the
file is flagged to avoid future compression attempts later.

* While finding delalloc extents, the pages are locked before being sent down
to the delalloc handler.  This allows the delalloc handler to do complex things
such as cleaning the pages, marking them writeback and starting IO on their
behalf.

* Inline extents are inserted at delalloc time now.  This allows us to compress
the data before inserting the inline extent, and it allows us to insert
an inline extent that spans multiple pages.

* All of the in-memory extent representations (extent_map.c, ordered-data.c etc)
are changed to record both an in-memory size and an on disk size, as well
as a flag for compression.

From a disk format point of view, the extent pointers in the file are changed
to record the on disk size of a given extent and some encoding flags.
Space in the disk format is allocated for compression encoding, as well
as encryption and a generic 'other' field.  Neither the encryption or the
'other' field are currently used.

In order to limit the amount of data read for a single random read in the
file, the size of a compressed extent is limited to 128k.  This is a
software only limit, the disk format supports u64 sized compressed extents.

In order to limit the ram consumed while processing extents, the uncompressed
size of a compressed extent is limited to 256k.  This is a software only limit
and will be subject to tuning later.

Checksumming is still done on compressed extents, and it is done on the
uncompressed version of the data.  This way additional encodings can be
layered on without having to figure out which encoding to checksum.

Compression happens at delalloc time, which is basically singled threaded because
it is usually done by a single pdflush thread.  This makes it tricky to
spread the compression load across all the cpus on the box.  We'll have to
look at parallel pdflush walks of dirty inodes at a later time.

Decompression is hooked into readpages and it does spread across CPUs nicely.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 263 +++++++++++++-------------------------------------------
 1 file changed, 58 insertions(+), 205 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 69abbe19add2..0aa15436590e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -95,153 +95,6 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
 	}
 }
 
-/* this does all the hard work for inserting an inline extent into
- * the btree.  Any existing inline extent is extended as required to make room,
- * otherwise things are inserted as required into the btree
- */
-static int noinline insert_inline_extent(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, struct inode *inode,
-				u64 offset, size_t size,
-				struct page **pages, size_t page_offset,
-				int num_pages)
-{
-	struct btrfs_key key;
-	struct btrfs_path *path;
-	struct extent_buffer *leaf;
-	char *kaddr;
-	unsigned long ptr;
-	struct btrfs_file_extent_item *ei;
-	struct page *page;
-	u32 datasize;
-	int err = 0;
-	int ret;
-	int i;
-	ssize_t cur_size;
-
-	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
-
-	btrfs_set_trans_block_group(trans, inode);
-
-	key.objectid = inode->i_ino;
-	key.offset = offset;
-	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
-
-	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
-	if (ret < 0) {
-		err = ret;
-		goto fail;
-	}
-	if (ret == 1) {
-		struct btrfs_key found_key;
-
-		if (path->slots[0] == 0)
-			goto insert;
-
-		path->slots[0]--;
-		leaf = path->nodes[0];
-		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
-		if (found_key.objectid != inode->i_ino)
-			goto insert;
-
-		if (found_key.type != BTRFS_EXTENT_DATA_KEY)
-			goto insert;
-		ei = btrfs_item_ptr(leaf, path->slots[0],
-				    struct btrfs_file_extent_item);
-
-		if (btrfs_file_extent_type(leaf, ei) !=
-		    BTRFS_FILE_EXTENT_INLINE) {
-			goto insert;
-		}
-		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-		ret = 0;
-	}
-	if (ret == 0) {
-		u32 found_size;
-		u64 found_end;
-
-		leaf = path->nodes[0];
-		ei = btrfs_item_ptr(leaf, path->slots[0],
-				    struct btrfs_file_extent_item);
-
-		if (btrfs_file_extent_type(leaf, ei) !=
-		    BTRFS_FILE_EXTENT_INLINE) {
-			err = ret;
-			btrfs_print_leaf(root, leaf);
-			printk("found wasn't inline offset %Lu inode %lu\n",
-			       offset, inode->i_ino);
-			goto fail;
-		}
-		found_size = btrfs_file_extent_inline_len(leaf,
-					  btrfs_item_nr(leaf, path->slots[0]));
-		found_end = key.offset + found_size;
-
-		if (found_end < offset + size) {
-			btrfs_release_path(root, path);
-			ret = btrfs_search_slot(trans, root, &key, path,
-						offset + size - found_end, 1);
-			BUG_ON(ret != 0);
-
-			ret = btrfs_extend_item(trans, root, path,
-						offset + size - found_end);
-			if (ret) {
-				err = ret;
-				goto fail;
-			}
-			leaf = path->nodes[0];
-			ei = btrfs_item_ptr(leaf, path->slots[0],
-					    struct btrfs_file_extent_item);
-			inode_add_bytes(inode, offset + size - found_end);
-		}
-		if (found_end < offset) {
-			ptr = btrfs_file_extent_inline_start(ei) + found_size;
-			memset_extent_buffer(leaf, 0, ptr, offset - found_end);
-		}
-	} else {
-insert:
-		btrfs_release_path(root, path);
-		datasize = offset + size - key.offset;
-		inode_add_bytes(inode, datasize);
-		datasize = btrfs_file_extent_calc_inline_size(datasize);
-		ret = btrfs_insert_empty_item(trans, root, path, &key,
-					      datasize);
-		if (ret) {
-			err = ret;
-			printk("got bad ret %d\n", ret);
-			goto fail;
-		}
-		leaf = path->nodes[0];
-		ei = btrfs_item_ptr(leaf, path->slots[0],
-				    struct btrfs_file_extent_item);
-		btrfs_set_file_extent_generation(leaf, ei, trans->transid);
-		btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
-	}
-	ptr = btrfs_file_extent_inline_start(ei) + offset - key.offset;
-
-	cur_size = size;
-	i = 0;
-	while (size > 0) {
-		page = pages[i];
-		kaddr = kmap_atomic(page, KM_USER0);
-		cur_size = min_t(size_t, PAGE_CACHE_SIZE - page_offset, size);
-		write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size);
-		kunmap_atomic(kaddr, KM_USER0);
-		page_offset = 0;
-		ptr += cur_size;
-		size -= cur_size;
-		if (i >= num_pages) {
-			printk("i %d num_pages %d\n", i, num_pages);
-		}
-		i++;
-	}
-	btrfs_mark_buffer_dirty(leaf);
-fail:
-	btrfs_free_path(path);
-	return err;
-}
-
 /*
  * after copy_from_user, pages need to be dirtied and we need to make
  * sure holes are created between the current EOF and the start of
@@ -267,8 +120,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	u64 start_pos;
 	u64 end_of_last_block;
 	u64 end_pos = pos + write_bytes;
-	u64 inline_size;
-	int did_inline = 0;
 	loff_t isize = i_size_read(inode);
 
 	start_pos = pos & ~((u64)root->sectorsize - 1);
@@ -314,7 +165,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			err = btrfs_insert_file_extent(trans, root,
 						       inode->i_ino,
 						       last_pos_in_file,
-						       0, 0, hole_size, 0);
+						       0, 0, hole_size, 0,
+						       hole_size, 0, 0, 0);
 			btrfs_drop_extent_cache(inode, last_pos_in_file,
 					last_pos_in_file + hole_size - 1, 0);
 			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
@@ -324,57 +176,19 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 			goto failed;
 	}
 
-	/*
-	 * either allocate an extent for the new bytes or setup the key
-	 * to show we are doing inline data in the extent
+	/* check for reserved extents on each page, we don't want
+	 * to reset the delalloc bit on things that already have
+	 * extents reserved.
 	 */
-	inline_size = end_pos;
-	if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
-	    inline_size > root->fs_info->max_inline ||
-	    (inline_size & (root->sectorsize -1)) == 0 ||
-	    inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
-		/* check for reserved extents on each page, we don't want
-		 * to reset the delalloc bit on things that already have
-		 * extents reserved.
-		 */
-		btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
-		for (i = 0; i < num_pages; i++) {
-			struct page *p = pages[i];
-			SetPageUptodate(p);
-			ClearPageChecked(p);
-			set_page_dirty(p);
-		}
-	} else {
-		u64 aligned_end;
-		/* step one, delete the existing extents in this range */
-		aligned_end = (pos + write_bytes + root->sectorsize - 1) &
-			~((u64)root->sectorsize - 1);
-		mutex_lock(&BTRFS_I(inode)->extent_mutex);
-		err = btrfs_drop_extents(trans, root, inode, start_pos,
-					 aligned_end, aligned_end, &hint_byte);
-		if (err)
-			goto failed;
-		if (isize > inline_size)
-			inline_size = min_t(u64, isize, aligned_end);
-		inline_size -= start_pos;
-		err = insert_inline_extent(trans, root, inode, start_pos,
-					   inline_size, pages, 0, num_pages);
-		btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1, 0);
-		BUG_ON(err);
-		mutex_unlock(&BTRFS_I(inode)->extent_mutex);
-
-		/*
-		 * an ugly way to do all the prop accounting around
-		 * the page bits and mapping tags
-		 */
-		set_page_writeback(pages[0]);
-		end_page_writeback(pages[0]);
-		did_inline = 1;
+	btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
+	for (i = 0; i < num_pages; i++) {
+		struct page *p = pages[i];
+		SetPageUptodate(p);
+		ClearPageChecked(p);
+		set_page_dirty(p);
 	}
 	if (end_pos > isize) {
 		i_size_write(inode, end_pos);
-		if (did_inline)
-			BTRFS_I(inode)->disk_i_size = end_pos;
 		btrfs_update_inode(trans, root, inode);
 	}
 failed:
@@ -399,6 +213,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 	int ret;
 	int testend = 1;
 	unsigned long flags;
+	int compressed = 0;
 
 	WARN_ON(end < start);
 	if (end == (u64)-1) {
@@ -434,6 +249,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			free_extent_map(em);
 			continue;
 		}
+		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
 		remove_extent_mapping(em_tree, em);
 
@@ -442,6 +258,12 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			split->start = em->start;
 			split->len = start - em->start;
 			split->block_start = em->block_start;
+
+			if (compressed)
+				split->block_len = em->block_len;
+			else
+				split->block_len = split->len;
+
 			split->bdev = em->bdev;
 			split->flags = flags;
 			ret = add_extent_mapping(em_tree, split);
@@ -459,7 +281,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			split->bdev = em->bdev;
 			split->flags = flags;
 
-			split->block_start = em->block_start + diff;
+			if (compressed) {
+				split->block_len = em->block_len;
+				split->block_start = em->block_start;
+			} else {
+				split->block_len = split->len;
+				split->block_start = em->block_start + diff;
+			}
 
 			ret = add_extent_mapping(em_tree, split);
 			BUG_ON(ret);
@@ -533,7 +361,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
 			struct btrfs_item *item;
 			item = btrfs_item_nr(leaf, slot);
 			extent_end = found_key.offset +
-			     btrfs_file_extent_inline_len(leaf, item);
+			     btrfs_file_extent_inline_len(leaf, extent);
 			extent_end = (extent_end + root->sectorsize - 1) &
 				~((u64)root->sectorsize -1 );
 		}
@@ -573,6 +401,10 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	u64 extent_end = 0;
 	u64 search_start = start;
 	u64 leaf_start;
+	u64 ram_bytes = 0;
+	u8 compression = 0;
+	u8 encryption = 0;
+	u16 other_encoding = 0;
 	u64 root_gen;
 	u64 root_owner;
 	struct extent_buffer *leaf;
@@ -589,6 +421,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	int recow;
 	int ret;
 
+	inline_limit = 0;
 	btrfs_drop_extent_cache(inode, start, end - 1, 0);
 
 	path = btrfs_alloc_path();
@@ -637,6 +470,12 @@ next_slot:
 			extent = btrfs_item_ptr(leaf, slot,
 						struct btrfs_file_extent_item);
 			found_type = btrfs_file_extent_type(leaf, extent);
+			compression = btrfs_file_extent_compression(leaf,
+								    extent);
+			encryption = btrfs_file_extent_encryption(leaf,
+								  extent);
+			other_encoding = btrfs_file_extent_other_encoding(leaf,
+								  extent);
 			if (found_type == BTRFS_FILE_EXTENT_REG) {
 				extent_end =
 				     btrfs_file_extent_disk_bytenr(leaf,
@@ -646,13 +485,13 @@ next_slot:
 
 				extent_end = key.offset +
 				     btrfs_file_extent_num_bytes(leaf, extent);
+				ram_bytes = btrfs_file_extent_ram_bytes(leaf,
+								extent);
 				found_extent = 1;
 			} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
-				struct btrfs_item *item;
-				item = btrfs_item_nr(leaf, slot);
 				found_inline = 1;
 				extent_end = key.offset +
-				     btrfs_file_extent_inline_len(leaf, item);
+				     btrfs_file_extent_inline_len(leaf, extent);
 			}
 		} else {
 			extent_end = search_start;
@@ -680,10 +519,9 @@ next_slot:
 			search_start = (extent_end + mask) & ~mask;
 		} else
 			search_start = extent_end;
-		if (end <= extent_end && start >= key.offset && found_inline) {
+
+		if (end <= extent_end && start >= key.offset && found_inline)
 			*hint_byte = EXTENT_MAP_INLINE;
-			goto out;
-		}
 
 		if (found_extent) {
 			read_extent_buffer(leaf, &old, (unsigned long)extent,
@@ -770,12 +608,27 @@ next_slot:
 			write_extent_buffer(leaf, &old,
 					    (unsigned long)extent, sizeof(old));
 
+			btrfs_set_file_extent_compression(leaf, extent,
+							  compression);
+			btrfs_set_file_extent_encryption(leaf, extent,
+							 encryption);
+			btrfs_set_file_extent_other_encoding(leaf, extent,
+							     other_encoding);
 			btrfs_set_file_extent_offset(leaf, extent,
 				    le64_to_cpu(old.offset) + end - key.offset);
 			WARN_ON(le64_to_cpu(old.num_bytes) <
 				(extent_end - end));
 			btrfs_set_file_extent_num_bytes(leaf, extent,
 							extent_end - end);
+
+			/*
+			 * set the ram bytes to the size of the full extent
+			 * before splitting.  This is a worst case flag,
+			 * but its the best we can do because we don't know
+			 * how splitting affects compression
+			 */
+			btrfs_set_file_extent_ram_bytes(leaf, extent,
+							ram_bytes);
 			btrfs_set_file_extent_type(leaf, extent,
 						   BTRFS_FILE_EXTENT_REG);
 
-- 
cgit v1.2.2


From 9036c10208e1fc496cef7692ba66a78699b360dc Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Thu, 30 Oct 2008 14:19:41 -0400
Subject: Btrfs: update hole handling v2

This patch splits the hole insertion code out of btrfs_setattr
into btrfs_cont_expand and updates btrfs_get_extent to properly
handle the case that file extent items are not continuous.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
 fs/btrfs/file.c | 41 ++++++-----------------------------------
 1 file changed, 6 insertions(+), 35 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0aa15436590e..b8a7637e14a1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -142,40 +142,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	}
 	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 
-	/* FIXME...EIEIO, ENOSPC and more */
-	/* insert any holes we need to create */
-	if (isize < start_pos) {
-		u64 last_pos_in_file;
-		u64 hole_size;
-		u64 mask = root->sectorsize - 1;
-		last_pos_in_file = (isize + mask) & ~mask;
-		hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
-		if (hole_size > 0) {
-			btrfs_wait_ordered_range(inode, last_pos_in_file,
-						 last_pos_in_file + hole_size);
-			mutex_lock(&BTRFS_I(inode)->extent_mutex);
-			err = btrfs_drop_extents(trans, root, inode,
-						 last_pos_in_file,
-						 last_pos_in_file + hole_size,
-						 last_pos_in_file,
-						 &hint_byte);
-			if (err)
-				goto failed;
-
-			err = btrfs_insert_file_extent(trans, root,
-						       inode->i_ino,
-						       last_pos_in_file,
-						       0, 0, hole_size, 0,
-						       hole_size, 0, 0, 0);
-			btrfs_drop_extent_cache(inode, last_pos_in_file,
-					last_pos_in_file + hole_size - 1, 0);
-			mutex_unlock(&BTRFS_I(inode)->extent_mutex);
-			btrfs_check_file(root, inode);
-		}
-		if (err)
-			goto failed;
-	}
-
 	/* check for reserved extents on each page, we don't want
 	 * to reset the delalloc bit on things that already have
 	 * extents reserved.
@@ -191,7 +157,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 		i_size_write(inode, end_pos);
 		btrfs_update_inode(trans, root, inode);
 	}
-failed:
 	err = btrfs_end_transaction(trans, root);
 out_unlock:
 	unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
@@ -697,6 +662,12 @@ static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
 	start_pos = pos & ~((u64)root->sectorsize - 1);
 	last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
 
+	if (start_pos > inode->i_size) {
+		err = btrfs_cont_expand(inode, start_pos);
+		if (err)
+			return err;
+	}
+
 	memset(pages, 0, num_pages * sizeof(struct page *));
 again:
 	for (i = 0; i < num_pages; i++) {
-- 
cgit v1.2.2


From 6643558db29006825dbb10012b3f8890aca4bcd5 Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Thu, 30 Oct 2008 14:19:50 -0400
Subject: Btrfs: Fix bookend extent race v2

When dropping middle part of an extent, btrfs_drop_extents truncates
the extent at first, then inserts a bookend extent.

Since truncation and insertion can't be done atomically, there is a small
period that the bookend extent isn't in the tree. This causes problem for
functions that search the tree for file extent item. The way to fix this is
lock the range of the bookend extent before truncation.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
 fs/btrfs/file.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b8a7637e14a1..1a0510ad030c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -364,6 +364,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
 {
 	u64 extent_end = 0;
+	u64 locked_end = end;
 	u64 search_start = start;
 	u64 leaf_start;
 	u64 ram_bytes = 0;
@@ -479,12 +480,6 @@ next_slot:
 			goto next_slot;
 		}
 
-		if (found_inline) {
-			u64 mask = root->sectorsize - 1;
-			search_start = (extent_end + mask) & ~mask;
-		} else
-			search_start = extent_end;
-
 		if (end <= extent_end && start >= key.offset && found_inline)
 			*hint_byte = EXTENT_MAP_INLINE;
 
@@ -501,6 +496,26 @@ next_slot:
 			if (found_inline && start <= key.offset)
 				keep = 1;
 		}
+
+		if (bookend && found_extent && locked_end < extent_end) {
+			ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
+					locked_end, extent_end - 1, GFP_NOFS);
+			if (!ret) {
+				btrfs_release_path(root, path);
+				lock_extent(&BTRFS_I(inode)->io_tree,
+					locked_end, extent_end - 1, GFP_NOFS);
+				locked_end = extent_end;
+				continue;
+			}
+			locked_end = extent_end;
+		}
+
+		if (found_inline) {
+			u64 mask = root->sectorsize - 1;
+			search_start = (extent_end + mask) & ~mask;
+		} else
+			search_start = extent_end;
+
 		/* truncate existing extent */
 		if (start > key.offset) {
 			u64 new_num;
@@ -638,6 +653,10 @@ next_slot:
 	}
 out:
 	btrfs_free_path(path);
+	if (locked_end > end) {
+		unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
+			      GFP_NOFS);
+	}
 	btrfs_check_file(root, inode);
 	return ret;
 }
-- 
cgit v1.2.2


From d899e05215178fed903ad0e7fc1cb4d8e0cc0a88 Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Thu, 30 Oct 2008 14:25:28 -0400
Subject: Btrfs: Add fallocate support v2 This patch updates btrfs-progs for
 fallocate support.

fallocate is a little different in Btrfs because we need to tell the
COW system that a given preallocated extent doesn't need to be
cow'd as long as there are no snapshots of it.  This leverages the
-o nodatacow checks.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
 fs/btrfs/file.c | 245 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 241 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1a0510ad030c..238a8e215eb9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -381,7 +381,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	int keep;
 	int slot;
 	int bookend;
-	int found_type;
+	int found_type = 0;
 	int found_extent;
 	int found_inline;
 	int recow;
@@ -442,7 +442,8 @@ next_slot:
 								  extent);
 			other_encoding = btrfs_file_extent_other_encoding(leaf,
 								  extent);
-			if (found_type == BTRFS_FILE_EXTENT_REG) {
+			if (found_type == BTRFS_FILE_EXTENT_REG ||
+			    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
 				extent_end =
 				     btrfs_file_extent_disk_bytenr(leaf,
 								   extent);
@@ -609,8 +610,7 @@ next_slot:
 			 */
 			btrfs_set_file_extent_ram_bytes(leaf, extent,
 							ram_bytes);
-			btrfs_set_file_extent_type(leaf, extent,
-						   BTRFS_FILE_EXTENT_REG);
+			btrfs_set_file_extent_type(leaf, extent, found_type);
 
 			btrfs_mark_buffer_dirty(path->nodes[0]);
 
@@ -661,6 +661,243 @@ out:
 	return ret;
 }
 
+static int extent_mergeable(struct extent_buffer *leaf, int slot,
+			    u64 objectid, u64 bytenr, u64 *start, u64 *end)
+{
+	struct btrfs_file_extent_item *fi;
+	struct btrfs_key key;
+	u64 extent_end;
+
+	if (slot < 0 || slot >= btrfs_header_nritems(leaf))
+		return 0;
+
+	btrfs_item_key_to_cpu(leaf, &key, slot);
+	if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
+		return 0;
+
+	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
+	    btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
+	    btrfs_file_extent_compression(leaf, fi) ||
+	    btrfs_file_extent_encryption(leaf, fi) ||
+	    btrfs_file_extent_other_encoding(leaf, fi))
+		return 0;
+
+	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+	if ((*start && *start != key.offset) || (*end && *end != extent_end))
+		return 0;
+
+	*start = key.offset;
+	*end = extent_end;
+	return 1;
+}
+
+/*
+ * Mark extent in the range start - end as written.
+ *
+ * This changes extent type from 'pre-allocated' to 'regular'. If only
+ * part of extent is marked as written, the extent will be split into
+ * two or three.
+ */
+int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
+			      struct btrfs_root *root,
+			      struct inode *inode, u64 start, u64 end)
+{
+	struct extent_buffer *leaf;
+	struct btrfs_path *path;
+	struct btrfs_file_extent_item *fi;
+	struct btrfs_key key;
+	u64 bytenr;
+	u64 num_bytes;
+	u64 extent_end;
+	u64 extent_offset;
+	u64 other_start;
+	u64 other_end;
+	u64 split = start;
+	u64 locked_end = end;
+	int extent_type;
+	int split_end = 1;
+	int ret;
+
+	btrfs_drop_extent_cache(inode, start, end - 1, 0);
+
+	path = btrfs_alloc_path();
+	BUG_ON(!path);
+again:
+	key.objectid = inode->i_ino;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	if (split == start)
+		key.offset = split;
+	else
+		key.offset = split - 1;
+
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	if (ret > 0 && path->slots[0] > 0)
+		path->slots[0]--;
+
+	leaf = path->nodes[0];
+	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+	BUG_ON(key.objectid != inode->i_ino ||
+	       key.type != BTRFS_EXTENT_DATA_KEY);
+	fi = btrfs_item_ptr(leaf, path->slots[0],
+			    struct btrfs_file_extent_item);
+	extent_type = btrfs_file_extent_type(leaf, fi);
+	BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC);
+	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+	BUG_ON(key.offset > start || extent_end < end);
+
+	bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+	num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+	extent_offset = btrfs_file_extent_offset(leaf, fi);
+
+	if (key.offset == start)
+		split = end;
+
+	if (key.offset == start && extent_end == end) {
+		int del_nr = 0;
+		int del_slot = 0;
+		u64 leaf_owner = btrfs_header_owner(leaf);
+		u64 leaf_gen = btrfs_header_generation(leaf);
+		other_start = end;
+		other_end = 0;
+		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
+				     bytenr, &other_start, &other_end)) {
+			extent_end = other_end;
+			del_slot = path->slots[0] + 1;
+			del_nr++;
+			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+						leaf->start, leaf_owner,
+						leaf_gen, inode->i_ino, 0);
+			BUG_ON(ret);
+		}
+		other_start = 0;
+		other_end = start;
+		if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
+				     bytenr, &other_start, &other_end)) {
+			key.offset = other_start;
+			del_slot = path->slots[0];
+			del_nr++;
+			ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+						leaf->start, leaf_owner,
+						leaf_gen, inode->i_ino, 0);
+			BUG_ON(ret);
+		}
+		split_end = 0;
+		if (del_nr == 0) {
+			btrfs_set_file_extent_type(leaf, fi,
+						   BTRFS_FILE_EXTENT_REG);
+			goto done;
+		}
+
+		fi = btrfs_item_ptr(leaf, del_slot - 1,
+				    struct btrfs_file_extent_item);
+		btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
+		btrfs_set_file_extent_num_bytes(leaf, fi,
+						extent_end - key.offset);
+		btrfs_mark_buffer_dirty(leaf);
+
+		ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+		BUG_ON(ret);
+		goto done;
+	} else if (split == start) {
+		if (locked_end < extent_end) {
+			ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
+					locked_end, extent_end - 1, GFP_NOFS);
+			if (!ret) {
+				btrfs_release_path(root, path);
+				lock_extent(&BTRFS_I(inode)->io_tree,
+					locked_end, extent_end - 1, GFP_NOFS);
+				locked_end = extent_end;
+				goto again;
+			}
+			locked_end = extent_end;
+		}
+		btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
+		extent_offset += split - key.offset;
+	} else  {
+		BUG_ON(key.offset != start);
+		btrfs_set_file_extent_offset(leaf, fi, extent_offset +
+					     split - key.offset);
+		btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
+		key.offset = split;
+		btrfs_set_item_key_safe(trans, root, path, &key);
+		extent_end = split;
+	}
+
+	if (extent_end == end) {
+		split_end = 0;
+		extent_type = BTRFS_FILE_EXTENT_REG;
+	}
+	if (extent_end == end && split == start) {
+		other_start = end;
+		other_end = 0;
+		if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
+				     bytenr, &other_start, &other_end)) {
+			path->slots[0]++;
+			fi = btrfs_item_ptr(leaf, path->slots[0],
+					    struct btrfs_file_extent_item);
+			key.offset = split;
+			btrfs_set_item_key_safe(trans, root, path, &key);
+			btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+			btrfs_set_file_extent_num_bytes(leaf, fi,
+							other_end - split);
+			goto done;
+		}
+	}
+	if (extent_end == end && split == end) {
+		other_start = 0;
+		other_end = start;
+		if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
+				     bytenr, &other_start, &other_end)) {
+			path->slots[0]--;
+			fi = btrfs_item_ptr(leaf, path->slots[0],
+					    struct btrfs_file_extent_item);
+			btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
+							other_start);
+			goto done;
+		}
+	}
+
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(root, path);
+
+	key.offset = start;
+	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
+	BUG_ON(ret);
+
+	leaf = path->nodes[0];
+	fi = btrfs_item_ptr(leaf, path->slots[0],
+			    struct btrfs_file_extent_item);
+	btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+	btrfs_set_file_extent_type(leaf, fi, extent_type);
+	btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
+	btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
+	btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+	btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
+	btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+	btrfs_set_file_extent_compression(leaf, fi, 0);
+	btrfs_set_file_extent_encryption(leaf, fi, 0);
+	btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+
+	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
+				   leaf->start, root->root_key.objectid,
+				   trans->transid, inode->i_ino);
+	BUG_ON(ret);
+done:
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(root, path);
+	if (split_end && split == start) {
+		split = end;
+		goto again;
+	}
+	if (locked_end > end) {
+		unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
+			      GFP_NOFS);
+	}
+	btrfs_free_path(path);
+	return 0;
+}
+
 /*
  * this gets pages into the page cache and locks them down, it also properly
  * waits for data=ordered extents to finish before allowing the pages to be
-- 
cgit v1.2.2


From 70b99e6959a4c28ae1b314985eca731f3db72f1d Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Fri, 31 Oct 2008 12:46:39 -0400
Subject: Btrfs: Compression corner fixes

Make sure we keep page->mapping NULL on the pages we're getting
via alloc_page.  It gets set so a few of the callbacks can do the right
thing, but in general these pages don't have a mapping.

Don't try to truncate compressed inline items in btrfs_drop_extents.
The whole compressed item must be preserved.

Don't try to create multipage inline compressed items.  When we try to
overwrite just the first page of the file, we would have to read in and recow
all the pages after it in the same compressed inline items.  For now, only
create single page inline items.

Make sure we lock pages in the correct order during delalloc.  The
search into the state tree for delalloc bytes can return bytes before
the page we already have locked.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 238a8e215eb9..0c8cc35a8b97 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -368,8 +368,8 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	u64 search_start = start;
 	u64 leaf_start;
 	u64 ram_bytes = 0;
-	u8 compression = 0;
-	u8 encryption = 0;
+	u8 compression;
+	u8 encryption;
 	u16 other_encoding = 0;
 	u64 root_gen;
 	u64 root_owner;
@@ -415,6 +415,8 @@ next_slot:
 		leaf_start = 0;
 		root_gen = 0;
 		root_owner = 0;
+		compression = 0;
+		encryption = 0;
 		extent = NULL;
 		leaf = path->nodes[0];
 		slot = path->slots[0];
@@ -546,8 +548,12 @@ next_slot:
 						   inline_limit - key.offset);
 				inode_sub_bytes(inode, extent_end -
 						inline_limit);
-				btrfs_truncate_item(trans, root, path,
-						    new_size, 1);
+				btrfs_set_file_extent_ram_bytes(leaf, extent,
+							new_size);
+				if (!compression && !encryption) {
+					btrfs_truncate_item(trans, root, path,
+							    new_size, 1);
+				}
 			}
 		}
 		/* delete the entire extent */
@@ -567,8 +573,11 @@ next_slot:
 			new_size = btrfs_file_extent_calc_inline_size(
 						   extent_end - end);
 			inode_sub_bytes(inode, end - key.offset);
-			ret = btrfs_truncate_item(trans, root, path,
-						  new_size, 0);
+			btrfs_set_file_extent_ram_bytes(leaf, extent,
+							new_size);
+			if (!compression && !encryption)
+				ret = btrfs_truncate_item(trans, root, path,
+							  new_size, 0);
 			BUG_ON(ret);
 		}
 		/* create bookend, splitting the extent in two */
-- 
cgit v1.2.2


From 771ed689d2cd53439e28e095bc38fbe40a71429e Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 6 Nov 2008 22:02:51 -0500
Subject: Btrfs: Optimize compressed writeback and reads

When reading compressed extents, try to put pages into the page cache
for any pages covered by the compressed extent that readpages didn't already
preload.

Add an async work queue to handle transformations at delayed allocation processing
time.  Right now this is just compression.  The workflow is:

1) Find offsets in the file marked for delayed allocation
2) Lock the pages
3) Lock the state bits
4) Call the async delalloc code

The async delalloc code clears the state lock bits and delalloc bits.  It is
important this happens before the range goes into the work queue because
otherwise it might deadlock with other work queue items that try to lock
those extent bits.

The file pages are compressed, and if the compression doesn't work the
pages are written back directly.

An ordered work queue is used to make sure the inodes are written in the same
order that pdflush or writepages sent them down.

This changes extent_write_cache_pages to let the writepage function
update the wbc nr_written count.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 53 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 17 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0c8cc35a8b97..337221ecca27 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -368,6 +368,8 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	u64 search_start = start;
 	u64 leaf_start;
 	u64 ram_bytes = 0;
+	u64 orig_parent = 0;
+	u64 disk_bytenr = 0;
 	u8 compression;
 	u8 encryption;
 	u16 other_encoding = 0;
@@ -500,17 +502,31 @@ next_slot:
 				keep = 1;
 		}
 
-		if (bookend && found_extent && locked_end < extent_end) {
-			ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
-					locked_end, extent_end - 1, GFP_NOFS);
-			if (!ret) {
-				btrfs_release_path(root, path);
-				lock_extent(&BTRFS_I(inode)->io_tree,
-					locked_end, extent_end - 1, GFP_NOFS);
+		if (bookend && found_extent) {
+			if (locked_end < extent_end) {
+				ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
+						locked_end, extent_end - 1,
+						GFP_NOFS);
+				if (!ret) {
+					btrfs_release_path(root, path);
+					lock_extent(&BTRFS_I(inode)->io_tree,
+						locked_end, extent_end - 1,
+						GFP_NOFS);
+					locked_end = extent_end;
+					continue;
+				}
 				locked_end = extent_end;
-				continue;
 			}
-			locked_end = extent_end;
+			orig_parent = path->nodes[0]->start;
+			disk_bytenr = le64_to_cpu(old.disk_bytenr);
+			if (disk_bytenr != 0) {
+				ret = btrfs_inc_extent_ref(trans, root,
+					   disk_bytenr,
+					   le64_to_cpu(old.disk_num_bytes),
+					   orig_parent, root->root_key.objectid,
+					   trans->transid, inode->i_ino);
+				BUG_ON(ret);
+			}
 		}
 
 		if (found_inline) {
@@ -537,8 +553,12 @@ next_slot:
 					inode_sub_bytes(inode, old_num -
 							new_num);
 				}
-				btrfs_set_file_extent_num_bytes(leaf, extent,
-								new_num);
+				if (!compression && !encryption) {
+					btrfs_set_file_extent_ram_bytes(leaf,
+							extent, new_num);
+				}
+				btrfs_set_file_extent_num_bytes(leaf,
+							extent, new_num);
 				btrfs_mark_buffer_dirty(leaf);
 			} else if (key.offset < inline_limit &&
 				   (end > extent_end) &&
@@ -582,11 +602,11 @@ next_slot:
 		}
 		/* create bookend, splitting the extent in two */
 		if (bookend && found_extent) {
-			u64 disk_bytenr;
 			struct btrfs_key ins;
 			ins.objectid = inode->i_ino;
 			ins.offset = end;
 			btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
+
 			btrfs_release_path(root, path);
 			ret = btrfs_insert_empty_item(trans, root, path, &ins,
 						      sizeof(*extent));
@@ -623,14 +643,13 @@ next_slot:
 
 			btrfs_mark_buffer_dirty(path->nodes[0]);
 
-			disk_bytenr = le64_to_cpu(old.disk_bytenr);
 			if (disk_bytenr != 0) {
-				ret = btrfs_inc_extent_ref(trans, root,
-						disk_bytenr,
-						le64_to_cpu(old.disk_num_bytes),
-						leaf->start,
+				ret = btrfs_update_extent_ref(trans, root,
+						disk_bytenr, orig_parent,
+					        leaf->start,
 						root->root_key.objectid,
 						trans->transid, ins.objectid);
+
 				BUG_ON(ret);
 			}
 			btrfs_release_path(root, path);
-- 
cgit v1.2.2


From ff5b7ee33d82414bf4baf299c21fb703bcc89629 Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Mon, 10 Nov 2008 07:34:43 -0500
Subject: Btrfs: Fix csum error for compressed data

The decompress code doesn't take the logical offset in extent
pointer into account. If the logical offset isn't zero, data
will be decompressed into wrong pages.

The solution used here is to record the starting offset of the extent
in the file separately from the logical start of the extent_map struct.
This allows us to avoid problems inserting overlapping extents.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
 fs/btrfs/file.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 337221ecca27..85841c538805 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -222,6 +222,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 		    em->start < start) {
 			split->start = em->start;
 			split->len = start - em->start;
+			split->orig_start = em->orig_start;
 			split->block_start = em->block_start;
 
 			if (compressed)
@@ -243,6 +244,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 
 			split->start = start + len;
 			split->len = em->start + em->len - (start + len);
+			split->orig_start = em->orig_start;
 			split->bdev = em->bdev;
 			split->flags = flags;
 
-- 
cgit v1.2.2


From 445a69449994a37615cd47e47bcab2e42a070adf Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 10 Nov 2008 11:53:33 -0500
Subject: Btrfs: Fix usage of struct extent_map->orig_start

This makes sure the orig_start field in struct extent_map gets set
everywhere the extent_map structs are created or modified.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 85841c538805..4119f9a95320 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -244,16 +244,17 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 
 			split->start = start + len;
 			split->len = em->start + em->len - (start + len);
-			split->orig_start = em->orig_start;
 			split->bdev = em->bdev;
 			split->flags = flags;
 
 			if (compressed) {
 				split->block_len = em->block_len;
 				split->block_start = em->block_start;
+				split->orig_start = em->orig_start;
 			} else {
 				split->block_len = split->len;
 				split->block_start = em->block_start + diff;
+				split->orig_start = split->start;
 			}
 
 			ret = add_extent_mapping(em_tree, split);
-- 
cgit v1.2.2


From 8247b41ac980d125de8aeba6f33f381056ac0ecb Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Tue, 11 Nov 2008 09:33:29 -0500
Subject: Btrfs: Fix starting search offset inside btrfs_drop_extents

btrfs_drop_extents will drop paths and search again when it needs to
force COW of higher nodes.  It was using the key it found during the last
search as the offset for the next search.

But, this wasn't always correct.  The key could be from before our desired
range, and because we're dropping the path, it is possible for file's items
to change while we do the search again.

The fix here is to make sure we don't search for something smaller than
the offset btrfs_drop_extents was called with.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4119f9a95320..934bc094bf17 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -436,7 +436,7 @@ next_slot:
 			goto out;
 		}
 		if (recow) {
-			search_start = key.offset;
+			search_start = max(key.offset, start);
 			continue;
 		}
 		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
-- 
cgit v1.2.2


From c36047d729a3fa080dd194b20b684cc9fe73e90c Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Wed, 12 Nov 2008 14:19:50 -0500
Subject: Btrfs: Fix race in btrfs_mark_extent_written

When extent needs to be split, btrfs_mark_extent_written truncates the extent
first, then inserts a new extent and increases the reference count.

The race happens if someone else deletes the old extent before the new extent
is inserted. The fix here is increase the reference count in advance. This race
is similar to the race in btrfs_drop_extents that was recently fixed.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
 fs/btrfs/file.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 934bc094bf17..1e8c024c69c3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -746,6 +746,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 	u64 other_end;
 	u64 split = start;
 	u64 locked_end = end;
+	u64 orig_parent;
 	int extent_type;
 	int split_end = 1;
 	int ret;
@@ -890,6 +891,12 @@ again:
 	}
 
 	btrfs_mark_buffer_dirty(leaf);
+
+	orig_parent = leaf->start;
+	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
+				   orig_parent, root->root_key.objectid,
+				   trans->transid, inode->i_ino);
+	BUG_ON(ret);
 	btrfs_release_path(root, path);
 
 	key.offset = start;
@@ -910,10 +917,13 @@ again:
 	btrfs_set_file_extent_encryption(leaf, fi, 0);
 	btrfs_set_file_extent_other_encoding(leaf, fi, 0);
 
-	ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
-				   leaf->start, root->root_key.objectid,
-				   trans->transid, inode->i_ino);
-	BUG_ON(ret);
+	if (orig_parent != leaf->start) {
+		ret = btrfs_update_extent_ref(trans, root, bytenr,
+					      orig_parent, leaf->start,
+					      root->root_key.objectid,
+					      trans->transid, inode->i_ino);
+		BUG_ON(ret);
+	}
 done:
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(root, path);
-- 
cgit v1.2.2


From 6e430f94e508fee1aefd1dfec88da3c24ce64433 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 2 Dec 2008 06:36:09 -0500
Subject: Btrfs: fix shadowed variable declarations

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1e8c024c69c3..1c9243560eab 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -662,18 +662,18 @@ next_slot:
 		}
 
 		if (found_extent && !keep) {
-			u64 disk_bytenr = le64_to_cpu(old.disk_bytenr);
+			u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr);
 
-			if (disk_bytenr != 0) {
+			if (old_disk_bytenr != 0) {
 				inode_sub_bytes(inode,
 						le64_to_cpu(old.num_bytes));
 				ret = btrfs_free_extent(trans, root,
-						disk_bytenr,
+						old_disk_bytenr,
 						le64_to_cpu(old.disk_num_bytes),
 						leaf_start, root_owner,
 						root_gen, key.objectid, 0);
 				BUG_ON(ret);
-				*hint_byte = disk_bytenr;
+				*hint_byte = old_disk_bytenr;
 			}
 		}
 
-- 
cgit v1.2.2


From c3027eb5523d6983f12628f3fe13d8a7576db701 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 8 Dec 2008 16:40:21 -0500
Subject: Btrfs: Add inode sequence number for NFS and reserved space in a few
 structs

This adds a sequence number to the btrfs inode that is increased on
every update.  NFS will be able to use that to detect when an inode has
changed, without relying on inaccurate time fields.

While we're here, this also:

Puts reserved space into the super block and inode

Adds a log root transid to the super so we can pick the newest super
based on the fsync log as well as the main transaction ID.  For now
the log root transid is always zero, but that'll get fixed.

Adds a starting offset to the dev_item.  This will let us do better
alignment calculations if we know the start of a partition on the disk.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1c9243560eab..b5a6a2b6f668 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1055,6 +1055,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
 
 	mutex_lock(&inode->i_mutex);
+	BTRFS_I(inode)->sequence++;
 	first_index = pos >> PAGE_CACHE_SHIFT;
 	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
 
-- 
cgit v1.2.2


From 580afd76e451deb6772d0507de580fb1df14da6c Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 8 Dec 2008 19:15:39 -0500
Subject: Btrfs: Fix compressed checksum fsync log copies

The fsync logging code makes sure to onl copy the relevant checksum for each
extent based on the file extent pointers it finds.

But for compressed extents, it needs to copy the checksum for the
entire extent.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b5a6a2b6f668..71bfe3a6a444 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1228,7 +1228,8 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 	mutex_unlock(&root->fs_info->trans_mutex);
 
 	root->fs_info->tree_log_batch++;
-	filemap_fdatawait(inode->i_mapping);
+	filemap_fdatawrite(inode->i_mapping);
+	btrfs_wait_ordered_range(inode, 0, (u64)-1);
 	root->fs_info->tree_log_batch++;
 
 	/*
-- 
cgit v1.2.2


From 17d217fe970d34720f4f1633dca73a6aa2f3d9d1 Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Fri, 12 Dec 2008 10:03:38 -0500
Subject: Btrfs: fix nodatasum handling in balancing code

Checksums on data can be disabled by mount option, so it's
possible some data extents don't have checksums or have
invalid checksums. This causes trouble for data relocation.
This patch contains following things to make data relocation
work.

1) make nodatasum/nodatacow mount option only affects new
files. Checksums and COW on data are only controlled by the
inode flags.

2) check the existence of checksum in the nodatacow checker.
If checksums exist, force COW the data extent. This ensure that
checksum for a given block is either valid or does not exist.

3) update data relocation code to properly handle the case
of checksum missing.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
 fs/btrfs/file.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 71bfe3a6a444..507081059d97 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1059,14 +1059,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 	first_index = pos >> PAGE_CACHE_SHIFT;
 	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
 
-	/*
-	 * if this is a nodatasum mount, force summing off for the inode
-	 * all the time.  That way a later mount with summing on won't
-	 * get confused
-	 */
-	if (btrfs_test_opt(root, NODATASUM))
-		btrfs_set_flag(inode, NODATASUM);
-
 	/*
 	 * there are lots of better ways to do this, but this code
 	 * makes sure the first and last page in the file range are
-- 
cgit v1.2.2


From 9aead43588f4bdb1bb61e348ad0f33794bbddc0f Mon Sep 17 00:00:00 2001
From: yanhai zhu <zhu.yanhai@gmail.com>
Date: Mon, 5 Jan 2009 15:49:11 -0500
Subject: Btrfs: Fix memset length in btrfs_file_write

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 507081059d97..5908521922fb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1094,7 +1094,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 					PAGE_CACHE_SHIFT;
 
 		WARN_ON(num_pages > nrptrs);
-		memset(pages, 0, sizeof(pages));
+		memset(pages, 0, sizeof(struct page *) * nrptrs);
 
 		ret = btrfs_check_free_space(root, write_bytes, 0);
 		if (ret)
-- 
cgit v1.2.2


From d397712bcc6a759a560fd247e6053ecae091f958 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Mon, 5 Jan 2009 21:25:51 -0500
Subject: Btrfs: Fix checkpatch.pl warnings

There were many, most are fixed now.  struct-funcs.c generates some warnings
but these are bogus.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/file.c | 49 ++++++++++++++++++++++++-------------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5908521922fb..0e3a13a45653 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -44,10 +44,10 @@
 /* simple helper to fault in pages and copy.  This should go away
  * and be replaced with calls into generic code.
  */
-static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
+static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 					 int write_bytes,
 					 struct page **prepared_pages,
-					 const char __user * buf)
+					 const char __user *buf)
 {
 	long page_fault = 0;
 	int i;
@@ -78,7 +78,7 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages,
 /*
  * unlocks pages after btrfs_file_write is done with them
  */
-static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
+static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
 {
 	size_t i;
 	for (i = 0; i < num_pages; i++) {
@@ -103,7 +103,7 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages)
  * this also makes the decision about creating an inline extent vs
  * doing real data extents, marking pages dirty and delalloc as required.
  */
-static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
+static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root,
 				   struct file *file,
 				   struct page **pages,
@@ -137,9 +137,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
 	btrfs_set_trans_block_group(trans, inode);
 	hint_byte = 0;
 
-	if ((end_of_last_block & 4095) == 0) {
-		printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
-	}
 	set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
 
 	/* check for reserved extents on each page, we don't want
@@ -185,7 +182,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 		len = (u64)-1;
 		testend = 0;
 	}
-	while(1) {
+	while (1) {
 		if (!split)
 			split = alloc_extent_map(GFP_NOFS);
 		if (!split2)
@@ -295,7 +292,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
 	path = btrfs_alloc_path();
 	ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
 				       last_offset, 0);
-	while(1) {
+	while (1) {
 		nritems = btrfs_header_nritems(path->nodes[0]);
 		if (path->slots[0] >= nritems) {
 			ret = btrfs_next_leaf(root, path);
@@ -314,8 +311,10 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
 		if (found_key.offset < last_offset) {
 			WARN_ON(1);
 			btrfs_print_leaf(root, leaf);
-			printk("inode %lu found offset %Lu expected %Lu\n",
-			       inode->i_ino, found_key.offset, last_offset);
+			printk(KERN_ERR "inode %lu found offset %llu "
+			       "expected %llu\n", inode->i_ino,
+			       (unsigned long long)found_key.offset,
+			       (unsigned long long)last_offset);
 			err = 1;
 			goto out;
 		}
@@ -331,7 +330,7 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
 			extent_end = found_key.offset +
 			     btrfs_file_extent_inline_len(leaf, extent);
 			extent_end = (extent_end + root->sectorsize - 1) &
-				~((u64)root->sectorsize -1 );
+				~((u64)root->sectorsize - 1);
 		}
 		last_offset = extent_end;
 		path->slots[0]++;
@@ -339,8 +338,9 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
 	if (0 && last_offset < inode->i_size) {
 		WARN_ON(1);
 		btrfs_print_leaf(root, leaf);
-		printk("inode %lu found offset %Lu size %Lu\n", inode->i_ino,
-		       last_offset, inode->i_size);
+		printk(KERN_ERR "inode %lu found offset %llu size %llu\n",
+		       inode->i_ino, (unsigned long long)last_offset,
+		       (unsigned long long)inode->i_size);
 		err = 1;
 
 	}
@@ -362,7 +362,7 @@ out:
  * inline_limit is used to tell this code which offsets in the file to keep
  * if they contain inline extents.
  */
-int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
+noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode,
 		       u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
 {
@@ -398,7 +398,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
-	while(1) {
+	while (1) {
 		recow = 0;
 		btrfs_release_path(root, path);
 		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
@@ -649,16 +649,15 @@ next_slot:
 			if (disk_bytenr != 0) {
 				ret = btrfs_update_extent_ref(trans, root,
 						disk_bytenr, orig_parent,
-					        leaf->start,
+						leaf->start,
 						root->root_key.objectid,
 						trans->transid, ins.objectid);
 
 				BUG_ON(ret);
 			}
 			btrfs_release_path(root, path);
-			if (disk_bytenr != 0) {
+			if (disk_bytenr != 0)
 				inode_add_bytes(inode, extent_end - end);
-			}
 		}
 
 		if (found_extent && !keep) {
@@ -944,7 +943,7 @@ done:
  * waits for data=ordered extents to finish before allowing the pages to be
  * modified.
  */
-static int noinline prepare_pages(struct btrfs_root *root, struct file *file,
+static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
 			 struct page **pages, size_t num_pages,
 			 loff_t pos, unsigned long first_index,
 			 unsigned long last_index, size_t write_bytes)
@@ -979,7 +978,8 @@ again:
 		struct btrfs_ordered_extent *ordered;
 		lock_extent(&BTRFS_I(inode)->io_tree,
 			    start_pos, last_pos - 1, GFP_NOFS);
-		ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
+		ordered = btrfs_lookup_first_ordered_extent(inode,
+							    last_pos - 1);
 		if (ordered &&
 		    ordered->file_offset + ordered->len > start_pos &&
 		    ordered->file_offset < last_pos) {
@@ -1085,7 +1085,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 		}
 	}
 
-	while(count > 0) {
+	while (count > 0) {
 		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
 		size_t write_bytes = min(count, nrptrs *
 					(size_t)PAGE_CACHE_SIZE -
@@ -1178,7 +1178,7 @@ out_nolock:
 	return num_written ? num_written : err;
 }
 
-int btrfs_release_file(struct inode * inode, struct file * filp)
+int btrfs_release_file(struct inode *inode, struct file *filp)
 {
 	if (filp->private_data)
 		btrfs_ioctl_trans_end(filp);
@@ -1237,9 +1237,8 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 	}
 
 	ret = btrfs_log_dentry_safe(trans, root, file->f_dentry);
-	if (ret < 0) {
+	if (ret < 0)
 		goto out;
-	}
 
 	/* we've logged all the items and now have a consistent
 	 * version of the file in the log.  It is possible that
-- 
cgit v1.2.2


From 1ba12553f3600ffebad226c5204ab0e46df98161 Mon Sep 17 00:00:00 2001
From: Yan Zheng <zheng.yan@oracle.com>
Date: Tue, 6 Jan 2009 09:58:02 -0500
Subject: Btrfs: don't change file extent's ram_bytes in btrfs_drop_extents

btrfs_drop_extents doesn't change file extent's ram_bytes
in the case of booked extent. To be consistent, we should
also not change ram_bytes when truncating existing extent.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
---
 fs/btrfs/file.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs/btrfs/file.c')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0e3a13a45653..90268334145e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -556,10 +556,6 @@ next_slot:
 					inode_sub_bytes(inode, old_num -
 							new_num);
 				}
-				if (!compression && !encryption) {
-					btrfs_set_file_extent_ram_bytes(leaf,
-							extent, new_num);
-				}
 				btrfs_set_file_extent_num_bytes(leaf,
 							extent, new_num);
 				btrfs_mark_buffer_dirty(leaf);
-- 
cgit v1.2.2