Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: Btrfs: break out of shrink_delalloc earlier btrfs: fix not enough reserved space btrfs: fix dip leak Btrfs: make sure not to return overlapping extents to fiemap Btrfs: deal with short returns from copy_from_user Btrfs: fix regressions in copy_from_user handling
author: Linus Torvalds <torvalds@linux-foundation.org> 2011-03-13 19:00:49 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-03-13 19:00:49 -0400
commit: 0e5b88cd9975dca6c191cc9bd11f233fac4ca882 (patch)
tree: 6302317acfb24d6b5ad7924aceaa0dd1c4a7a371 /fs
parent: eebea5d13d391981061d6ef069a841002eba4a7a (diff)
parent: 36e39c40b3facc9b489a13f1d301fc53ff6960a3 (diff)
5 files changed, 135 insertions, 62 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f820fa23df4..7f78cc78fdd0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
        u64 disk_total;         /* total bytes on disk, takes mirrors into
                                   account */
+        /*
+         * we bump reservation progress every time we decrement
+         * bytes_reserved.  This way people waiting for reservations
+         * know something good has happened and they can check
+         * for progress.  The number here isn't to be trusted, it
+         * just shows reclaim activity
+         */
+        unsigned long reservation_progress;
        int full;               /* indicates that we cannot allocate any more
                                   chunks for this space */
        int force_alloc;        /* set if we need to force a chunk alloc for
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 588ff9849873..7b3089b5c2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        u64 max_reclaim;
        u64 reclaimed = 0;
        long time_left;
-        int pause = 1;
        int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
        int loops = 0;
+        unsigned long progress;
        block_rsv = &root->fs_info->delalloc_block_rsv;
        space_info = block_rsv->space_info;
        smp_mb();
        reserved = space_info->bytes_reserved;
+        progress = space_info->reservation_progress;
        if (reserved == 0)
                return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
                writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
                spin_lock(&space_info->lock);
-                if (reserved > space_info->bytes_reserved) {
+                if (reserved > space_info->bytes_reserved)
-                        loops = 0;
                        reclaimed += reserved - space_info->bytes_reserved;
-                } else {
-                        loops++;
-                }
                reserved = space_info->bytes_reserved;
                spin_unlock(&space_info->lock);
+                loops++;
                if (reserved == 0 || reclaimed >= max_reclaim)
                        break;
                if (trans && trans->transaction->blocked)
                        return -EAGAIN;
-                __set_current_state(TASK_INTERRUPTIBLE);
+                time_left = schedule_timeout_interruptible(1);
-                time_left = schedule_timeout(pause);
                /* We were interrupted, exit */
                if (time_left)
                        break;
-                pause <<= 1;
+                /* we've kicked the IO a few times, if anything has been freed,
-                if (pause > HZ / 10)
+                 * exit.  There is no sense in looping here for a long time
-                        pause = HZ / 10;
+                 * when we really need to commit the transaction, or there are
+                 * just too many writers without enough free space
+                 */
+                if (loops > 3) {
+                        smp_mb();
+                        if (progress != space_info->reservation_progress)
+                                break;
+                }
        }
        return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
                if (num_bytes) {
                        spin_lock(&space_info->lock);
                        space_info->bytes_reserved -= num_bytes;
+                        space_info->reservation_progress++;
                        spin_unlock(&space_info->lock);
                }
        }
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
        if (block_rsv->reserved >= block_rsv->size) {
                num_bytes = block_rsv->reserved - block_rsv->size;
                sinfo->bytes_reserved -= num_bytes;
+                sinfo->reservation_progress++;
                block_rsv->reserved = block_rsv->size;
                block_rsv->full = 1;
        }
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
                to_reserve = 0;
        }
        spin_unlock(&BTRFS_I(inode)->accounting_lock);
        to_reserve += calc_csum_metadata_size(inode, num_bytes);
        ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
        if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                        btrfs_set_block_group_used(&cache->item, old_val);
                        cache->reserved -= num_bytes;
                        cache->space_info->bytes_reserved -= num_bytes;
+                        cache->space_info->reservation_progress++;
                        cache->space_info->bytes_used += num_bytes;
                        cache->space_info->disk_used += num_bytes * factor;
                        spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
        if (reserved) {
                cache->reserved -= num_bytes;
                cache->space_info->bytes_reserved -= num_bytes;
+                cache->space_info->reservation_progress++;
        }
        spin_unlock(&cache->lock);
        spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
                                space_info->bytes_readonly += num_bytes;
                        cache->reserved -= num_bytes;
                        space_info->bytes_reserved -= num_bytes;
+                        space_info->reservation_progress++;
                }
                spin_unlock(&cache->lock);
                spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                if (ret) {
                        spin_lock(&cache->space_info->lock);
                        cache->space_info->bytes_reserved -= buf->len;
+                        cache->space_info->reservation_progress++;
                        spin_unlock(&cache->space_info->lock);
                }
                goto out;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd3f172e94e6..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        }
        while (!end) {
-                off = extent_map_end(em);
+                u64 offset_in_extent;
-                if (off >= max)
-                        end = 1;
+                /* break if the extent we found is outside the range */
+                if (em->start >= max || extent_map_end(em) < off)
+                        break;
+                /*
+                 * get_extent may return an extent that starts before our
+                 * requested range.  We have to make sure the ranges
+                 * we return to fiemap always move forward and don't
+                 * overlap, so adjust the offsets here
+                 */
+                em_start = max(em->start, off);
-                em_start = em->start;
+                /*
-                em_len = em->len;
+                 * record the offset from the start of the extent
+                 * for adjusting the disk offset below
+                 */
+                offset_in_extent = em_start - em->start;
                em_end = extent_map_end(em);
+                em_len = em_end - em_start;
                emflags = em->flags;
                disko = 0;
                flags = 0;
+                /*
+                 * bump off for our next call to get_extent
+                 */
+                off = extent_map_end(em);
+                if (off >= max)
+                        end = 1;
                if (em->block_start == EXTENT_MAP_LAST_BYTE) {
                        end = 1;
                        flags |= FIEMAP_EXTENT_LAST;
@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                        flags |= (FIEMAP_EXTENT_DELALLOC |
                                  FIEMAP_EXTENT_UNKNOWN);
                } else {
-                        disko = em->block_start;
+                        disko = em->block_start + offset_in_extent;
                }
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d5940..f447b783bb84 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
                /* Flush processor's dcache for this page */
                flush_dcache_page(page);
+                /*
+                 * if we get a partial write, we can end up with
+                 * partially up to date pages.  These add
+                 * a lot of complexity, so make sure they don't
+                 * happen by forcing this copy to be retried.
+                 *
+                 * The rest of the btrfs_file_write code will fall
+                 * back to page at a time copies after we return 0.
+                 */
+                if (!PageUptodate(page) && copied < count)
+                        copied = 0;
                iov_iter_advance(i, copied);
                write_bytes -= copied;
                total_copied += copied;
@@ -763,6 +776,27 @@ out:
 }
 /*
+ * on error we return an unlocked page and the error value
+ * on success we return a locked page and 0
+ */
+static int prepare_uptodate_page(struct page *page, u64 pos)
+{
+        int ret = 0;
+        if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+                ret = btrfs_readpage(NULL, page);
+                if (ret)
+                        return ret;
+                lock_page(page);
+                if (!PageUptodate(page)) {
+                        unlock_page(page);
+                        return -EIO;
+                }
+        }
+        return 0;
+}
+/*
 * this gets pages into the page cache and locks them down, it also properly
 * waits for data=ordered extents to finish before allowing the pages to be
 * modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
        unsigned long index = pos >> PAGE_CACHE_SHIFT;
        struct inode *inode = fdentry(file)->d_inode;
        int err = 0;
+        int faili = 0;
        u64 start_pos;
        u64 last_pos;
@@ -794,15 +829,24 @@ again:
        for (i = 0; i < num_pages; i++) {
                pages[i] = grab_cache_page(inode->i_mapping, index + i);
                if (!pages[i]) {
-                        int c;
+                        faili = i - 1;
-                        for (c = i - 1; c >= 0; c--) {
+                        err = -ENOMEM;
-                                unlock_page(pages[c]);
+                        goto fail;
-                                page_cache_release(pages[c]);
+                }
-                        }
-                        return -ENOMEM;
+                if (i == 0)
+                        err = prepare_uptodate_page(pages[i], pos);
+                if (i == num_pages - 1)
+                        err = prepare_uptodate_page(pages[i],
+                                                    pos + write_bytes);
+                if (err) {
+                        page_cache_release(pages[i]);
+                        faili = i - 1;
+                        goto fail;
                }
                wait_on_page_writeback(pages[i]);
        }
+        err = 0;
        if (start_pos < inode->i_size) {
                struct btrfs_ordered_extent *ordered;
                lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
                WARN_ON(!PageLocked(pages[i]));
        }
        return 0;
+fail:
+        while (faili >= 0) {
+                unlock_page(pages[faili]);
+                page_cache_release(pages[faili]);
+                faili--;
+        }
+        return err;
 }
 static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        struct file *file = iocb->ki_filp;
        struct inode *inode = fdentry(file)->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-        struct page *pinned[2];
        struct page **pages = NULL;
        struct iov_iter i;
        loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
                      (file->f_flags & O_DIRECT));
-        pinned[0] = NULL;
-        pinned[1] = NULL;
        start_pos = pos;
        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        first_index = pos >> PAGE_CACHE_SHIFT;
        last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
-        /*
-         * there are lots of better ways to do this, but this code
-         * makes sure the first and last page in the file range are
-         * up to date and ready for cow
-         */
-        if ((pos & (PAGE_CACHE_SIZE - 1))) {
-                pinned[0] = grab_cache_page(inode->i_mapping, first_index);
-                if (!PageUptodate(pinned[0])) {
-                        ret = btrfs_readpage(NULL, pinned[0]);
-                        BUG_ON(ret);
-                        wait_on_page_locked(pinned[0]);
-                } else {
-                        unlock_page(pinned[0]);
-                }
-        }
-        if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
-                pinned[1] = grab_cache_page(inode->i_mapping, last_index);
-                if (!PageUptodate(pinned[1])) {
-                        ret = btrfs_readpage(NULL, pinned[1]);
-                        BUG_ON(ret);
-                        wait_on_page_locked(pinned[1]);
-                } else {
-                        unlock_page(pinned[1]);
-                }
-        }
        while (iov_iter_count(&i) > 0) {
                size_t offset = pos & (PAGE_CACHE_SIZE - 1);
                size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                copied = btrfs_copy_from_user(pos, num_pages,
                                           write_bytes, pages, &i);
-                dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
-                                PAGE_CACHE_SHIFT;
+                /*
+                 * if we have trouble faulting in the pages, fall
+                 * back to one page at a time
+                 */
+                if (copied < write_bytes)
+                        nrptrs = 1;
+                if (copied == 0)
+                        dirty_pages = 0;
+                else
+                        dirty_pages = (copied + offset +
+                                       PAGE_CACHE_SIZE - 1) >>
+                                       PAGE_CACHE_SHIFT;
                if (num_pages > dirty_pages) {
                        if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
                err = ret;
        kfree(pages);
-        if (pinned[0])
-                page_cache_release(pinned[0]);
-        if (pinned[1])
-                page_cache_release(pinned[1]);
        *ppos = pos;
        /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0efdb65953c5..9007bbd01dbf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4821,10 +4821,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                goto fail;
        /*
-         * 1 item for inode ref
+         * 2 items for inode and inode ref
         * 2 items for dir items
+         * 1 item for parent inode
         */
-        trans = btrfs_start_transaction(root, 3);
+        trans = btrfs_start_transaction(root, 5);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
                goto fail;
@@ -6056,6 +6057,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        if (!skip_sum) {
                dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
                if (!dip->csums) {
+                        kfree(dip);
                        ret = -ENOMEM;
                        goto free_ordered;
                }
author	Linus Torvalds <torvalds@linux-foundation.org>	2011-03-13 19:00:49 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-03-13 19:00:49 -0400
commit	0e5b88cd9975dca6c191cc9bd11f233fac4ca882 (patch)
tree	6302317acfb24d6b5ad7924aceaa0dd1c4a7a371 /fs
parent	eebea5d13d391981061d6ef069a841002eba4a7a (diff)
parent	36e39c40b3facc9b489a13f1d301fc53ff6960a3 (diff)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6f820fa23df4..7f78cc78fdd0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
729	u64 disk_total; /* total bytes on disk, takes mirrors into	729	u64 disk_total; /* total bytes on disk, takes mirrors into
730	account */	730	account */
731		731
		732	/*
		733	* we bump reservation progress every time we decrement
		734	* bytes_reserved. This way people waiting for reservations
		735	* know something good has happened and they can check
		736	* for progress. The number here isn't to be trusted, it
		737	* just shows reclaim activity
		738	*/
		739	unsigned long reservation_progress;
		740
732	int full; /* indicates that we cannot allocate any more	741	int full; /* indicates that we cannot allocate any more
733	chunks for this space */	742	chunks for this space */
734	int force_alloc; /* set if we need to force a chunk alloc for	743	int force_alloc; /* set if we need to force a chunk alloc for


diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 588ff9849873..7b3089b5c2df 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3342	u64 max_reclaim;	3342	u64 max_reclaim;
3343	u64 reclaimed = 0;	3343	u64 reclaimed = 0;
3344	long time_left;	3344	long time_left;
3345	int pause = 1;
3346	int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;	3345	int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347	int loops = 0;	3346	int loops = 0;
		3347	unsigned long progress;
3348		3348
3349	block_rsv = &root->fs_info->delalloc_block_rsv;	3349	block_rsv = &root->fs_info->delalloc_block_rsv;
3350	space_info = block_rsv->space_info;	3350	space_info = block_rsv->space_info;
3351		3351
3352	smp_mb();	3352	smp_mb();
3353	reserved = space_info->bytes_reserved;	3353	reserved = space_info->bytes_reserved;
		3354	progress = space_info->reservation_progress;
3354		3355
3355	if (reserved == 0)	3356	if (reserved == 0)
3356	return 0;	3357	return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3365	writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);	3366	writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3366		3367
3367	spin_lock(&space_info->lock);	3368	spin_lock(&space_info->lock);
3368	if (reserved > space_info->bytes_reserved) {	3369	if (reserved > space_info->bytes_reserved)
3369	loops = 0;
3370	reclaimed += reserved - space_info->bytes_reserved;	3370	reclaimed += reserved - space_info->bytes_reserved;
3371	} else {
3372	loops++;
3373	}
3374	reserved = space_info->bytes_reserved;	3371	reserved = space_info->bytes_reserved;
3375	spin_unlock(&space_info->lock);	3372	spin_unlock(&space_info->lock);
3376		3373
		3374	loops++;
		3375
3377	if (reserved == 0 \|\| reclaimed >= max_reclaim)	3376	if (reserved == 0 \|\| reclaimed >= max_reclaim)
3378	break;	3377	break;
3379		3378
3380	if (trans && trans->transaction->blocked)	3379	if (trans && trans->transaction->blocked)
3381	return -EAGAIN;	3380	return -EAGAIN;
3382		3381
3383	__set_current_state(TASK_INTERRUPTIBLE);	3382	time_left = schedule_timeout_interruptible(1);
3384	time_left = schedule_timeout(pause);
3385		3383
3386	/* We were interrupted, exit */	3384	/* We were interrupted, exit */
3387	if (time_left)	3385	if (time_left)
3388	break;	3386	break;
3389		3387
3390	pause <<= 1;	3388	/* we've kicked the IO a few times, if anything has been freed,
3391	if (pause > HZ / 10)	3389	* exit. There is no sense in looping here for a long time
3392	pause = HZ / 10;	3390	* when we really need to commit the transaction, or there are
		3391	* just too many writers without enough free space
		3392	*/
		3393
		3394	if (loops > 3) {
		3395	smp_mb();
		3396	if (progress != space_info->reservation_progress)
		3397	break;
		3398	}
3393		3399
3394	}	3400	}
3395	return reclaimed >= to_reclaim;	3401	return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3612	if (num_bytes) {	3618	if (num_bytes) {
3613	spin_lock(&space_info->lock);	3619	spin_lock(&space_info->lock);
3614	space_info->bytes_reserved -= num_bytes;	3620	space_info->bytes_reserved -= num_bytes;
		3621	space_info->reservation_progress++;
3615	spin_unlock(&space_info->lock);	3622	spin_unlock(&space_info->lock);
3616	}	3623	}
3617	}	3624	}
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3844	if (block_rsv->reserved >= block_rsv->size) {	3851	if (block_rsv->reserved >= block_rsv->size) {
3845	num_bytes = block_rsv->reserved - block_rsv->size;	3852	num_bytes = block_rsv->reserved - block_rsv->size;
3846	sinfo->bytes_reserved -= num_bytes;	3853	sinfo->bytes_reserved -= num_bytes;
		3854	sinfo->reservation_progress++;
3847	block_rsv->reserved = block_rsv->size;	3855	block_rsv->reserved = block_rsv->size;
3848	block_rsv->full = 1;	3856	block_rsv->full = 1;
3849	}	3857	}
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4005	to_reserve = 0;	4013	to_reserve = 0;
4006	}	4014	}
4007	spin_unlock(&BTRFS_I(inode)->accounting_lock);	4015	spin_unlock(&BTRFS_I(inode)->accounting_lock);
4008
4009	to_reserve += calc_csum_metadata_size(inode, num_bytes);	4016	to_reserve += calc_csum_metadata_size(inode, num_bytes);
4010	ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);	4017	ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4011	if (ret)	4018	if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4133	btrfs_set_block_group_used(&cache->item, old_val);	4140	btrfs_set_block_group_used(&cache->item, old_val);
4134	cache->reserved -= num_bytes;	4141	cache->reserved -= num_bytes;
4135	cache->space_info->bytes_reserved -= num_bytes;	4142	cache->space_info->bytes_reserved -= num_bytes;
		4143	cache->space_info->reservation_progress++;
4136	cache->space_info->bytes_used += num_bytes;	4144	cache->space_info->bytes_used += num_bytes;
4137	cache->space_info->disk_used += num_bytes * factor;	4145	cache->space_info->disk_used += num_bytes * factor;
4138	spin_unlock(&cache->lock);	4146	spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
4184	if (reserved) {	4192	if (reserved) {
4185	cache->reserved -= num_bytes;	4193	cache->reserved -= num_bytes;
4186	cache->space_info->bytes_reserved -= num_bytes;	4194	cache->space_info->bytes_reserved -= num_bytes;
		4195	cache->space_info->reservation_progress++;
4187	}	4196	}
4188	spin_unlock(&cache->lock);	4197	spin_unlock(&cache->lock);
4189	spin_unlock(&cache->space_info->lock);	4198	spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
4234	space_info->bytes_readonly += num_bytes;	4243	space_info->bytes_readonly += num_bytes;
4235	cache->reserved -= num_bytes;	4244	cache->reserved -= num_bytes;
4236	space_info->bytes_reserved -= num_bytes;	4245	space_info->bytes_reserved -= num_bytes;
		4246	space_info->reservation_progress++;
4237	}	4247	}
4238	spin_unlock(&cache->lock);	4248	spin_unlock(&cache->lock);
4239	spin_unlock(&space_info->lock);	4249	spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4712	if (ret) {	4722	if (ret) {
4713	spin_lock(&cache->space_info->lock);	4723	spin_lock(&cache->space_info->lock);
4714	cache->space_info->bytes_reserved -= buf->len;	4724	cache->space_info->bytes_reserved -= buf->len;
		4725	cache->space_info->reservation_progress++;
4715	spin_unlock(&cache->space_info->lock);	4726	spin_unlock(&cache->space_info->lock);
4716	}	4727	}
4717	goto out;	4728	goto out;


diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fd3f172e94e6..714adc4ac4c2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c
@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode inode, struct fiemap_extent_info fieinfo,
3046	}	3046	}
3047		3047
3048	while (!end) {	3048	while (!end) {
3049	off = extent_map_end(em);	3049	u64 offset_in_extent;
3050	if (off >= max)	3050
3051	end = 1;	3051	/* break if the extent we found is outside the range */
		3052	if (em->start >= max \|\| extent_map_end(em) < off)
		3053	break;
		3054
		3055	/*
		3056	* get_extent may return an extent that starts before our
		3057	* requested range. We have to make sure the ranges
		3058	* we return to fiemap always move forward and don't
		3059	* overlap, so adjust the offsets here
		3060	*/
		3061	em_start = max(em->start, off);
3052		3062
3053	em_start = em->start;	3063	/*
3054	em_len = em->len;	3064	* record the offset from the start of the extent
		3065	* for adjusting the disk offset below
		3066	*/
		3067	offset_in_extent = em_start - em->start;
3055	em_end = extent_map_end(em);	3068	em_end = extent_map_end(em);
		3069	em_len = em_end - em_start;
3056	emflags = em->flags;	3070	emflags = em->flags;
3057	disko = 0;	3071	disko = 0;
3058	flags = 0;	3072	flags = 0;
3059		3073
		3074	/*
		3075	* bump off for our next call to get_extent
		3076	*/
		3077	off = extent_map_end(em);
		3078	if (off >= max)
		3079	end = 1;
		3080
3060	if (em->block_start == EXTENT_MAP_LAST_BYTE) {	3081	if (em->block_start == EXTENT_MAP_LAST_BYTE) {
3061	end = 1;	3082	end = 1;
3062	flags \|= FIEMAP_EXTENT_LAST;	3083	flags \|= FIEMAP_EXTENT_LAST;
@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode inode, struct fiemap_extent_info fieinfo,
3067	flags \|= (FIEMAP_EXTENT_DELALLOC \|	3088	flags \|= (FIEMAP_EXTENT_DELALLOC \|
3068	FIEMAP_EXTENT_UNKNOWN);	3089	FIEMAP_EXTENT_UNKNOWN);
3069	} else {	3090	} else {
3070	disko = em->block_start;	3091	disko = em->block_start + offset_in_extent;
3071	}	3092	}
3072	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))	3093	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3073	flags \|= FIEMAP_EXTENT_ENCODED;	3094	flags \|= FIEMAP_EXTENT_ENCODED;


diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7084140d5940..f447b783bb84 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
70		70
71	/* Flush processor's dcache for this page */	71	/* Flush processor's dcache for this page */
72	flush_dcache_page(page);	72	flush_dcache_page(page);
		73
		74	/*
		75	* if we get a partial write, we can end up with
		76	* partially up to date pages. These add
		77	* a lot of complexity, so make sure they don't
		78	* happen by forcing this copy to be retried.
		79	*
		80	* The rest of the btrfs_file_write code will fall
		81	* back to page at a time copies after we return 0.
		82	*/
		83	if (!PageUptodate(page) && copied < count)
		84	copied = 0;
		85
73	iov_iter_advance(i, copied);	86	iov_iter_advance(i, copied);
74	write_bytes -= copied;	87	write_bytes -= copied;
75	total_copied += copied;	88	total_copied += copied;
@@ -763,6 +776,27 @@ out:
763	}	776	}
764		777
765	/*	778	/*
		779	* on error we return an unlocked page and the error value
		780	* on success we return a locked page and 0
		781	*/
		782	static int prepare_uptodate_page(struct page *page, u64 pos)
		783	{
		784	int ret = 0;
		785
		786	if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
		787	ret = btrfs_readpage(NULL, page);
		788	if (ret)
		789	return ret;
		790	lock_page(page);
		791	if (!PageUptodate(page)) {
		792	unlock_page(page);
		793	return -EIO;
		794	}
		795	}
		796	return 0;
		797	}
		798
		799	/*
766	* this gets pages into the page cache and locks them down, it also properly	800	* this gets pages into the page cache and locks them down, it also properly
767	* waits for data=ordered extents to finish before allowing the pages to be	801	* waits for data=ordered extents to finish before allowing the pages to be
768	* modified.	802	* modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root root, struct file file,
777	unsigned long index = pos >> PAGE_CACHE_SHIFT;	811	unsigned long index = pos >> PAGE_CACHE_SHIFT;
778	struct inode *inode = fdentry(file)->d_inode;	812	struct inode *inode = fdentry(file)->d_inode;
779	int err = 0;	813	int err = 0;
		814	int faili = 0;
780	u64 start_pos;	815	u64 start_pos;
781	u64 last_pos;	816	u64 last_pos;
782		817
@@ -794,15 +829,24 @@ again:
794	for (i = 0; i < num_pages; i++) {	829	for (i = 0; i < num_pages; i++) {
795	pages[i] = grab_cache_page(inode->i_mapping, index + i);	830	pages[i] = grab_cache_page(inode->i_mapping, index + i);
796	if (!pages[i]) {	831	if (!pages[i]) {
797	int c;	832	faili = i - 1;
798	for (c = i - 1; c >= 0; c--) {	833	err = -ENOMEM;
799	unlock_page(pages[c]);	834	goto fail;
800	page_cache_release(pages[c]);	835	}
801	}	836
802	return -ENOMEM;	837	if (i == 0)
		838	err = prepare_uptodate_page(pages[i], pos);
		839	if (i == num_pages - 1)
		840	err = prepare_uptodate_page(pages[i],
		841	pos + write_bytes);
		842	if (err) {
		843	page_cache_release(pages[i]);
		844	faili = i - 1;
		845	goto fail;
803	}	846	}
804	wait_on_page_writeback(pages[i]);	847	wait_on_page_writeback(pages[i]);
805	}	848	}
		849	err = 0;
806	if (start_pos < inode->i_size) {	850	if (start_pos < inode->i_size) {
807	struct btrfs_ordered_extent *ordered;	851	struct btrfs_ordered_extent *ordered;
808	lock_extent_bits(&BTRFS_I(inode)->io_tree,	852	lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
842	WARN_ON(!PageLocked(pages[i]));	886	WARN_ON(!PageLocked(pages[i]));
843	}	887	}
844	return 0;	888	return 0;
		889	fail:
		890	while (faili >= 0) {
		891	unlock_page(pages[faili]);
		892	page_cache_release(pages[faili]);
		893	faili--;
		894	}
		895	return err;
		896
845	}	897	}
846		898
847	static ssize_t btrfs_file_aio_write(struct kiocb *iocb,	899	static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
851	struct file *file = iocb->ki_filp;	903	struct file *file = iocb->ki_filp;
852	struct inode *inode = fdentry(file)->d_inode;	904	struct inode *inode = fdentry(file)->d_inode;
853	struct btrfs_root *root = BTRFS_I(inode)->root;	905	struct btrfs_root *root = BTRFS_I(inode)->root;
854	struct page *pinned[2];
855	struct page **pages = NULL;	906	struct page **pages = NULL;
856	struct iov_iter i;	907	struct iov_iter i;
857	loff_t *ppos = &iocb->ki_pos;	908	loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
872	will_write = ((file->f_flags & O_DSYNC) \|\| IS_SYNC(inode) \|\|	923	will_write = ((file->f_flags & O_DSYNC) \|\| IS_SYNC(inode) \|\|
873	(file->f_flags & O_DIRECT));	924	(file->f_flags & O_DIRECT));
874		925
875	pinned[0] = NULL;
876	pinned[1] = NULL;
877
878	start_pos = pos;	926	start_pos = pos;
879		927
880	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);	928	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
962	first_index = pos >> PAGE_CACHE_SHIFT;	1010	first_index = pos >> PAGE_CACHE_SHIFT;
963	last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;	1011	last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
964		1012
965	/*
966	* there are lots of better ways to do this, but this code
967	* makes sure the first and last page in the file range are
968	* up to date and ready for cow
969	*/
970	if ((pos & (PAGE_CACHE_SIZE - 1))) {
971	pinned[0] = grab_cache_page(inode->i_mapping, first_index);
972	if (!PageUptodate(pinned[0])) {
973	ret = btrfs_readpage(NULL, pinned[0]);
974	BUG_ON(ret);
975	wait_on_page_locked(pinned[0]);
976	} else {
977	unlock_page(pinned[0]);
978	}
979	}
980	if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
981	pinned[1] = grab_cache_page(inode->i_mapping, last_index);
982	if (!PageUptodate(pinned[1])) {
983	ret = btrfs_readpage(NULL, pinned[1]);
984	BUG_ON(ret);
985	wait_on_page_locked(pinned[1]);
986	} else {
987	unlock_page(pinned[1]);
988	}
989	}
990
991	while (iov_iter_count(&i) > 0) {	1013	while (iov_iter_count(&i) > 0) {
992	size_t offset = pos & (PAGE_CACHE_SIZE - 1);	1014	size_t offset = pos & (PAGE_CACHE_SIZE - 1);
993	size_t write_bytes = min(iov_iter_count(&i),	1015	size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1024		1046
1025	copied = btrfs_copy_from_user(pos, num_pages,	1047	copied = btrfs_copy_from_user(pos, num_pages,
1026	write_bytes, pages, &i);	1048	write_bytes, pages, &i);
1027	dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>	1049
1028	PAGE_CACHE_SHIFT;	1050	/*
		1051	* if we have trouble faulting in the pages, fall
		1052	* back to one page at a time
		1053	*/
		1054	if (copied < write_bytes)
		1055	nrptrs = 1;
		1056
		1057	if (copied == 0)
		1058	dirty_pages = 0;
		1059	else
		1060	dirty_pages = (copied + offset +
		1061	PAGE_CACHE_SIZE - 1) >>
		1062	PAGE_CACHE_SHIFT;
1029		1063
1030	if (num_pages > dirty_pages) {	1064	if (num_pages > dirty_pages) {
1031	if (copied > 0)	1065	if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
1069	err = ret;	1103	err = ret;
1070		1104
1071	kfree(pages);	1105	kfree(pages);
1072	if (pinned[0])
1073	page_cache_release(pinned[0]);
1074	if (pinned[1])
1075	page_cache_release(pinned[1]);
1076	*ppos = pos;	1106	*ppos = pos;
1077		1107
1078	/*	1108	/*


diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0efdb65953c5..9007bbd01dbf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c
@@ -4821,10 +4821,11 @@ static int btrfs_link(struct dentry old_dentry, struct inode dir,
4821	goto fail;	4821	goto fail;
4822		4822
4823	/*	4823	/*
4824	* 1 item for inode ref	4824	* 2 items for inode and inode ref
4825	* 2 items for dir items	4825	* 2 items for dir items
		4826	* 1 item for parent inode
4826	*/	4827	*/
4827	trans = btrfs_start_transaction(root, 3);	4828	trans = btrfs_start_transaction(root, 5);
4828	if (IS_ERR(trans)) {	4829	if (IS_ERR(trans)) {
4829	err = PTR_ERR(trans);	4830	err = PTR_ERR(trans);
4830	goto fail;	4831	goto fail;
@@ -6056,6 +6057,7 @@ static void btrfs_submit_direct(int rw, struct bio bio, struct inode inode,
6056	if (!skip_sum) {	6057	if (!skip_sum) {
6057	dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);	6058	dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6058	if (!dip->csums) {	6059	if (!dip->csums) {
		6060	kfree(dip);
6059	ret = -ENOMEM;	6061	ret = -ENOMEM;
6060	goto free_ordered;	6062	goto free_ordered;
6061	}	6063	}