aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-03 20:29:19 -0500
committerIngo Molnar <mingo@elte.hu>2009-03-03 20:29:19 -0500
commit91d75e209bd59695f0708d66964d928d45b3b2f3 (patch)
tree32cab1359d951e4193bebb181a0f0319824a2b95 /fs
parent9976b39b5031bbf76f715893cf080b6a17683881 (diff)
parent8b0e5860cb099d7958d13b00ffbc35ad02735700 (diff)
Merge branch 'x86/core' into core/percpu
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile6
-rw-r--r--fs/bio.c2
-rw-r--r--fs/btrfs/btrfs_inode.h8
-rw-r--r--fs/btrfs/ctree.h40
-rw-r--r--fs/btrfs/extent-tree.c252
-rw-r--r--fs/btrfs/file.c16
-rw-r--r--fs/btrfs/inode.c62
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/ialloc.c7
-rw-r--r--fs/ext4/inode.c11
-rw-r--r--fs/ext4/super.c1
-rw-r--r--fs/jffs2/background.c18
-rw-r--r--fs/jffs2/readinode.c42
-rw-r--r--fs/ocfs2/alloc.c27
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c12
-rw-r--r--fs/ocfs2/dlm/dlmthread.c3
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/dlmglue.c11
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/super.c8
-rw-r--r--fs/ocfs2/xattr.c27
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/page.c2
26 files changed, 455 insertions, 126 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 38bc735c67ad..dc20db348679 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -69,10 +69,12 @@ obj-$(CONFIG_DLM) += dlm/
69# Do not add any filesystems before this line 69# Do not add any filesystems before this line
70obj-$(CONFIG_REISERFS_FS) += reiserfs/ 70obj-$(CONFIG_REISERFS_FS) += reiserfs/
71obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 71obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
72obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4 72obj-$(CONFIG_EXT2_FS) += ext2/
73# We place ext4 after ext2 so plain ext2 root fs's are mounted using ext2
74# unless explicitly requested by rootfstype
75obj-$(CONFIG_EXT4_FS) += ext4/
73obj-$(CONFIG_JBD) += jbd/ 76obj-$(CONFIG_JBD) += jbd/
74obj-$(CONFIG_JBD2) += jbd2/ 77obj-$(CONFIG_JBD2) += jbd2/
75obj-$(CONFIG_EXT2_FS) += ext2/
76obj-$(CONFIG_CRAMFS) += cramfs/ 78obj-$(CONFIG_CRAMFS) += cramfs/
77obj-$(CONFIG_SQUASHFS) += squashfs/ 79obj-$(CONFIG_SQUASHFS) += squashfs/
78obj-y += ramfs/ 80obj-y += ramfs/
diff --git a/fs/bio.c b/fs/bio.c
index 72ab251cdb9c..124b95c4d582 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -302,7 +302,7 @@ void bio_init(struct bio *bio)
302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
303{ 303{
304 struct bio *bio = NULL; 304 struct bio *bio = NULL;
305 void *p; 305 void *uninitialized_var(p);
306 306
307 if (bs) { 307 if (bs) {
308 p = mempool_alloc(bs->bio_pool, gfp_mask); 308 p = mempool_alloc(bs->bio_pool, gfp_mask);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index a8c9693b75ac..72677ce2b74f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -66,6 +66,9 @@ struct btrfs_inode {
66 */ 66 */
67 struct list_head delalloc_inodes; 67 struct list_head delalloc_inodes;
68 68
69 /* the space_info for where this inode's data allocations are done */
70 struct btrfs_space_info *space_info;
71
69 /* full 64 bit generation number, struct vfs_inode doesn't have a big 72 /* full 64 bit generation number, struct vfs_inode doesn't have a big
70 * enough field for this. 73 * enough field for this.
71 */ 74 */
@@ -94,6 +97,11 @@ struct btrfs_inode {
94 */ 97 */
95 u64 delalloc_bytes; 98 u64 delalloc_bytes;
96 99
100 /* total number of bytes that may be used for this inode for
101 * delalloc
102 */
103 u64 reserved_bytes;
104
97 /* 105 /*
98 * the size of the file stored in the metadata on disk. data=ordered 106 * the size of the file stored in the metadata on disk. data=ordered
99 * means the in-memory i_size might be larger than the size on disk 107 * means the in-memory i_size might be larger than the size on disk
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 766b31ae3186..82491ba8fa40 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -596,13 +596,27 @@ struct btrfs_block_group_item {
596 596
597struct btrfs_space_info { 597struct btrfs_space_info {
598 u64 flags; 598 u64 flags;
599 u64 total_bytes; 599
600 u64 bytes_used; 600 u64 total_bytes; /* total bytes in the space */
601 u64 bytes_pinned; 601 u64 bytes_used; /* total bytes used on disk */
602 u64 bytes_reserved; 602 u64 bytes_pinned; /* total bytes pinned, will be freed when the
603 u64 bytes_readonly; 603 transaction finishes */
604 int full; 604 u64 bytes_reserved; /* total bytes the allocator has reserved for
605 int force_alloc; 605 current allocations */
606 u64 bytes_readonly; /* total bytes that are read only */
607
608 /* delalloc accounting */
609 u64 bytes_delalloc; /* number of bytes reserved for allocation,
610 this space is not necessarily reserved yet
611 by the allocator */
612 u64 bytes_may_use; /* number of bytes that may be used for
613 delalloc */
614
615 int full; /* indicates that we cannot allocate any more
616 chunks for this space */
617 int force_alloc; /* set if we need to force a chunk alloc for
618 this space */
619
606 struct list_head list; 620 struct list_head list;
607 621
608 /* for block groups in our same type */ 622 /* for block groups in our same type */
@@ -1782,6 +1796,16 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root);
1782int btrfs_cleanup_reloc_trees(struct btrfs_root *root); 1796int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
1783int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); 1797int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
1784u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); 1798u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
1799void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
1800int btrfs_check_metadata_free_space(struct btrfs_root *root);
1801int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
1802 u64 bytes);
1803void btrfs_free_reserved_data_space(struct btrfs_root *root,
1804 struct inode *inode, u64 bytes);
1805void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
1806 u64 bytes);
1807void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1808 u64 bytes);
1785/* ctree.c */ 1809/* ctree.c */
1786int btrfs_previous_item(struct btrfs_root *root, 1810int btrfs_previous_item(struct btrfs_root *root,
1787 struct btrfs_path *path, u64 min_objectid, 1811 struct btrfs_path *path, u64 min_objectid,
@@ -2027,8 +2051,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2027unsigned long btrfs_force_ra(struct address_space *mapping, 2051unsigned long btrfs_force_ra(struct address_space *mapping,
2028 struct file_ra_state *ra, struct file *file, 2052 struct file_ra_state *ra, struct file *file,
2029 pgoff_t offset, pgoff_t last_index); 2053 pgoff_t offset, pgoff_t last_index);
2030int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
2031 int for_del);
2032int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); 2054int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
2033int btrfs_readpage(struct file *file, struct page *page); 2055int btrfs_readpage(struct file *file, struct page *page);
2034void btrfs_delete_inode(struct inode *inode); 2056void btrfs_delete_inode(struct inode *inode);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0a5d796c9f7e..6b5966aacf44 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -60,6 +60,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
60 u64 bytenr, u64 num_bytes, int alloc, 60 u64 bytenr, u64 num_bytes, int alloc,
61 int mark_free); 61 int mark_free);
62 62
63static int do_chunk_alloc(struct btrfs_trans_handle *trans,
64 struct btrfs_root *extent_root, u64 alloc_bytes,
65 u64 flags, int force);
66
63static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 67static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
64{ 68{
65 return (cache->flags & bits) == bits; 69 return (cache->flags & bits) == bits;
@@ -1909,6 +1913,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1909 found->bytes_pinned = 0; 1913 found->bytes_pinned = 0;
1910 found->bytes_reserved = 0; 1914 found->bytes_reserved = 0;
1911 found->bytes_readonly = 0; 1915 found->bytes_readonly = 0;
1916 found->bytes_delalloc = 0;
1912 found->full = 0; 1917 found->full = 0;
1913 found->force_alloc = 0; 1918 found->force_alloc = 0;
1914 *space_info = found; 1919 *space_info = found;
@@ -1972,6 +1977,233 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1972 return flags; 1977 return flags;
1973} 1978}
1974 1979
1980static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
1981{
1982 struct btrfs_fs_info *info = root->fs_info;
1983 u64 alloc_profile;
1984
1985 if (data) {
1986 alloc_profile = info->avail_data_alloc_bits &
1987 info->data_alloc_profile;
1988 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1989 } else if (root == root->fs_info->chunk_root) {
1990 alloc_profile = info->avail_system_alloc_bits &
1991 info->system_alloc_profile;
1992 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1993 } else {
1994 alloc_profile = info->avail_metadata_alloc_bits &
1995 info->metadata_alloc_profile;
1996 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1997 }
1998
1999 return btrfs_reduce_alloc_profile(root, data);
2000}
2001
2002void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2003{
2004 u64 alloc_target;
2005
2006 alloc_target = btrfs_get_alloc_profile(root, 1);
2007 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
2008 alloc_target);
2009}
2010
2011/*
2012 * for now this just makes sure we have at least 5% of our metadata space free
2013 * for use.
2014 */
2015int btrfs_check_metadata_free_space(struct btrfs_root *root)
2016{
2017 struct btrfs_fs_info *info = root->fs_info;
2018 struct btrfs_space_info *meta_sinfo;
2019 u64 alloc_target, thresh;
2020 int committed = 0, ret;
2021
2022 /* get the space info for where the metadata will live */
2023 alloc_target = btrfs_get_alloc_profile(root, 0);
2024 meta_sinfo = __find_space_info(info, alloc_target);
2025
2026again:
2027 spin_lock(&meta_sinfo->lock);
2028 if (!meta_sinfo->full)
2029 thresh = meta_sinfo->total_bytes * 80;
2030 else
2031 thresh = meta_sinfo->total_bytes * 95;
2032
2033 do_div(thresh, 100);
2034
2035 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2036 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
2037 struct btrfs_trans_handle *trans;
2038 if (!meta_sinfo->full) {
2039 meta_sinfo->force_alloc = 1;
2040 spin_unlock(&meta_sinfo->lock);
2041
2042 trans = btrfs_start_transaction(root, 1);
2043 if (!trans)
2044 return -ENOMEM;
2045
2046 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2047 2 * 1024 * 1024, alloc_target, 0);
2048 btrfs_end_transaction(trans, root);
2049 goto again;
2050 }
2051 spin_unlock(&meta_sinfo->lock);
2052
2053 if (!committed) {
2054 committed = 1;
2055 trans = btrfs_join_transaction(root, 1);
2056 if (!trans)
2057 return -ENOMEM;
2058 ret = btrfs_commit_transaction(trans, root);
2059 if (ret)
2060 return ret;
2061 goto again;
2062 }
2063 return -ENOSPC;
2064 }
2065 spin_unlock(&meta_sinfo->lock);
2066
2067 return 0;
2068}
2069
2070/*
2071 * This will check the space that the inode allocates from to make sure we have
2072 * enough space for bytes.
2073 */
2074int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
2075 u64 bytes)
2076{
2077 struct btrfs_space_info *data_sinfo;
2078 int ret = 0, committed = 0;
2079
2080 /* make sure bytes are sectorsize aligned */
2081 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2082
2083 data_sinfo = BTRFS_I(inode)->space_info;
2084again:
2085 /* make sure we have enough space to handle the data first */
2086 spin_lock(&data_sinfo->lock);
2087 if (data_sinfo->total_bytes - data_sinfo->bytes_used -
2088 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
2089 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
2090 data_sinfo->bytes_may_use < bytes) {
2091 struct btrfs_trans_handle *trans;
2092
2093 /*
2094 * if we don't have enough free bytes in this space then we need
2095 * to alloc a new chunk.
2096 */
2097 if (!data_sinfo->full) {
2098 u64 alloc_target;
2099
2100 data_sinfo->force_alloc = 1;
2101 spin_unlock(&data_sinfo->lock);
2102
2103 alloc_target = btrfs_get_alloc_profile(root, 1);
2104 trans = btrfs_start_transaction(root, 1);
2105 if (!trans)
2106 return -ENOMEM;
2107
2108 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2109 bytes + 2 * 1024 * 1024,
2110 alloc_target, 0);
2111 btrfs_end_transaction(trans, root);
2112 if (ret)
2113 return ret;
2114 goto again;
2115 }
2116 spin_unlock(&data_sinfo->lock);
2117
2118 /* commit the current transaction and try again */
2119 if (!committed) {
2120 committed = 1;
2121 trans = btrfs_join_transaction(root, 1);
2122 if (!trans)
2123 return -ENOMEM;
2124 ret = btrfs_commit_transaction(trans, root);
2125 if (ret)
2126 return ret;
2127 goto again;
2128 }
2129
2130 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
2131 ", %llu bytes_used, %llu bytes_reserved, "
2132 "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
2133 "%llu total\n", bytes, data_sinfo->bytes_delalloc,
2134 data_sinfo->bytes_used, data_sinfo->bytes_reserved,
2135 data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
2136 data_sinfo->bytes_may_use, data_sinfo->total_bytes);
2137 return -ENOSPC;
2138 }
2139 data_sinfo->bytes_may_use += bytes;
2140 BTRFS_I(inode)->reserved_bytes += bytes;
2141 spin_unlock(&data_sinfo->lock);
2142
2143 return btrfs_check_metadata_free_space(root);
2144}
2145
2146/*
2147 * if there was an error for whatever reason after calling
2148 * btrfs_check_data_free_space, call this so we can cleanup the counters.
2149 */
2150void btrfs_free_reserved_data_space(struct btrfs_root *root,
2151 struct inode *inode, u64 bytes)
2152{
2153 struct btrfs_space_info *data_sinfo;
2154
2155 /* make sure bytes are sectorsize aligned */
2156 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2157
2158 data_sinfo = BTRFS_I(inode)->space_info;
2159 spin_lock(&data_sinfo->lock);
2160 data_sinfo->bytes_may_use -= bytes;
2161 BTRFS_I(inode)->reserved_bytes -= bytes;
2162 spin_unlock(&data_sinfo->lock);
2163}
2164
2165/* called when we are adding a delalloc extent to the inode's io_tree */
2166void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
2167 u64 bytes)
2168{
2169 struct btrfs_space_info *data_sinfo;
2170
2171 /* get the space info for where this inode will be storing its data */
2172 data_sinfo = BTRFS_I(inode)->space_info;
2173
2174 /* make sure we have enough space to handle the data first */
2175 spin_lock(&data_sinfo->lock);
2176 data_sinfo->bytes_delalloc += bytes;
2177
2178 /*
2179 * we are adding a delalloc extent without calling
2180 * btrfs_check_data_free_space first. This happens on a weird
2181 * writepage condition, but shouldn't hurt our accounting
2182 */
2183 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
2184 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
2185 BTRFS_I(inode)->reserved_bytes = 0;
2186 } else {
2187 data_sinfo->bytes_may_use -= bytes;
2188 BTRFS_I(inode)->reserved_bytes -= bytes;
2189 }
2190
2191 spin_unlock(&data_sinfo->lock);
2192}
2193
2194/* called when we are clearing an delalloc extent from the inode's io_tree */
2195void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
2196 u64 bytes)
2197{
2198 struct btrfs_space_info *info;
2199
2200 info = BTRFS_I(inode)->space_info;
2201
2202 spin_lock(&info->lock);
2203 info->bytes_delalloc -= bytes;
2204 spin_unlock(&info->lock);
2205}
2206
1975static int do_chunk_alloc(struct btrfs_trans_handle *trans, 2207static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1976 struct btrfs_root *extent_root, u64 alloc_bytes, 2208 struct btrfs_root *extent_root, u64 alloc_bytes,
1977 u64 flags, int force) 2209 u64 flags, int force)
@@ -3105,6 +3337,10 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3105 (unsigned long long)(info->total_bytes - info->bytes_used - 3337 (unsigned long long)(info->total_bytes - info->bytes_used -
3106 info->bytes_pinned - info->bytes_reserved), 3338 info->bytes_pinned - info->bytes_reserved),
3107 (info->full) ? "" : "not "); 3339 (info->full) ? "" : "not ");
3340 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
3341 " may_use=%llu, used=%llu\n", info->total_bytes,
3342 info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
3343 info->bytes_used);
3108 3344
3109 down_read(&info->groups_sem); 3345 down_read(&info->groups_sem);
3110 list_for_each_entry(cache, &info->block_groups, list) { 3346 list_for_each_entry(cache, &info->block_groups, list) {
@@ -3131,24 +3367,10 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3131{ 3367{
3132 int ret; 3368 int ret;
3133 u64 search_start = 0; 3369 u64 search_start = 0;
3134 u64 alloc_profile;
3135 struct btrfs_fs_info *info = root->fs_info; 3370 struct btrfs_fs_info *info = root->fs_info;
3136 3371
3137 if (data) { 3372 data = btrfs_get_alloc_profile(root, data);
3138 alloc_profile = info->avail_data_alloc_bits &
3139 info->data_alloc_profile;
3140 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
3141 } else if (root == root->fs_info->chunk_root) {
3142 alloc_profile = info->avail_system_alloc_bits &
3143 info->system_alloc_profile;
3144 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
3145 } else {
3146 alloc_profile = info->avail_metadata_alloc_bits &
3147 info->metadata_alloc_profile;
3148 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
3149 }
3150again: 3373again:
3151 data = btrfs_reduce_alloc_profile(root, data);
3152 /* 3374 /*
3153 * the only place that sets empty_size is btrfs_realloc_node, which 3375 * the only place that sets empty_size is btrfs_realloc_node, which
3154 * is not called recursively on allocations 3376 * is not called recursively on allocations
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 872f104576e5..dc78954861b3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1091,19 +1091,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1091 WARN_ON(num_pages > nrptrs); 1091 WARN_ON(num_pages > nrptrs);
1092 memset(pages, 0, sizeof(struct page *) * nrptrs); 1092 memset(pages, 0, sizeof(struct page *) * nrptrs);
1093 1093
1094 ret = btrfs_check_free_space(root, write_bytes, 0); 1094 ret = btrfs_check_data_free_space(root, inode, write_bytes);
1095 if (ret) 1095 if (ret)
1096 goto out; 1096 goto out;
1097 1097
1098 ret = prepare_pages(root, file, pages, num_pages, 1098 ret = prepare_pages(root, file, pages, num_pages,
1099 pos, first_index, last_index, 1099 pos, first_index, last_index,
1100 write_bytes); 1100 write_bytes);
1101 if (ret) 1101 if (ret) {
1102 btrfs_free_reserved_data_space(root, inode,
1103 write_bytes);
1102 goto out; 1104 goto out;
1105 }
1103 1106
1104 ret = btrfs_copy_from_user(pos, num_pages, 1107 ret = btrfs_copy_from_user(pos, num_pages,
1105 write_bytes, pages, buf); 1108 write_bytes, pages, buf);
1106 if (ret) { 1109 if (ret) {
1110 btrfs_free_reserved_data_space(root, inode,
1111 write_bytes);
1107 btrfs_drop_pages(pages, num_pages); 1112 btrfs_drop_pages(pages, num_pages);
1108 goto out; 1113 goto out;
1109 } 1114 }
@@ -1111,8 +1116,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1111 ret = dirty_and_release_pages(NULL, root, file, pages, 1116 ret = dirty_and_release_pages(NULL, root, file, pages,
1112 num_pages, pos, write_bytes); 1117 num_pages, pos, write_bytes);
1113 btrfs_drop_pages(pages, num_pages); 1118 btrfs_drop_pages(pages, num_pages);
1114 if (ret) 1119 if (ret) {
1120 btrfs_free_reserved_data_space(root, inode,
1121 write_bytes);
1115 goto out; 1122 goto out;
1123 }
1116 1124
1117 if (will_write) { 1125 if (will_write) {
1118 btrfs_fdatawrite_range(inode->i_mapping, pos, 1126 btrfs_fdatawrite_range(inode->i_mapping, pos,
@@ -1136,6 +1144,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1136 } 1144 }
1137out: 1145out:
1138 mutex_unlock(&inode->i_mutex); 1146 mutex_unlock(&inode->i_mutex);
1147 if (ret)
1148 err = ret;
1139 1149
1140out_nolock: 1150out_nolock:
1141 kfree(pages); 1151 kfree(pages);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3cee77ae03c8..7d4f948bc22a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -102,34 +102,6 @@ static int btrfs_init_inode_security(struct inode *inode, struct inode *dir)
102} 102}
103 103
104/* 104/*
105 * a very lame attempt at stopping writes when the FS is 85% full. There
106 * are countless ways this is incorrect, but it is better than nothing.
107 */
108int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
109 int for_del)
110{
111 u64 total;
112 u64 used;
113 u64 thresh;
114 int ret = 0;
115
116 spin_lock(&root->fs_info->delalloc_lock);
117 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
118 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
119 if (for_del)
120 thresh = total * 90;
121 else
122 thresh = total * 85;
123
124 do_div(thresh, 100);
125
126 if (used + root->fs_info->delalloc_bytes + num_required > thresh)
127 ret = -ENOSPC;
128 spin_unlock(&root->fs_info->delalloc_lock);
129 return ret;
130}
131
132/*
133 * this does all the hard work for inserting an inline extent into 105 * this does all the hard work for inserting an inline extent into
134 * the btree. The caller should have done a btrfs_drop_extents so that 106 * the btree. The caller should have done a btrfs_drop_extents so that
135 * no overlapping inline items exist in the btree 107 * no overlapping inline items exist in the btree
@@ -1190,6 +1162,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1190 */ 1162 */
1191 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1163 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
1192 struct btrfs_root *root = BTRFS_I(inode)->root; 1164 struct btrfs_root *root = BTRFS_I(inode)->root;
1165 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1193 spin_lock(&root->fs_info->delalloc_lock); 1166 spin_lock(&root->fs_info->delalloc_lock);
1194 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1167 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
1195 root->fs_info->delalloc_bytes += end - start + 1; 1168 root->fs_info->delalloc_bytes += end - start + 1;
@@ -1223,9 +1196,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
1223 (unsigned long long)end - start + 1, 1196 (unsigned long long)end - start + 1,
1224 (unsigned long long) 1197 (unsigned long long)
1225 root->fs_info->delalloc_bytes); 1198 root->fs_info->delalloc_bytes);
1199 btrfs_delalloc_free_space(root, inode, (u64)-1);
1226 root->fs_info->delalloc_bytes = 0; 1200 root->fs_info->delalloc_bytes = 0;
1227 BTRFS_I(inode)->delalloc_bytes = 0; 1201 BTRFS_I(inode)->delalloc_bytes = 0;
1228 } else { 1202 } else {
1203 btrfs_delalloc_free_space(root, inode,
1204 end - start + 1);
1229 root->fs_info->delalloc_bytes -= end - start + 1; 1205 root->fs_info->delalloc_bytes -= end - start + 1;
1230 BTRFS_I(inode)->delalloc_bytes -= end - start + 1; 1206 BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
1231 } 1207 }
@@ -2245,10 +2221,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2245 2221
2246 root = BTRFS_I(dir)->root; 2222 root = BTRFS_I(dir)->root;
2247 2223
2248 ret = btrfs_check_free_space(root, 1, 1);
2249 if (ret)
2250 goto fail;
2251
2252 trans = btrfs_start_transaction(root, 1); 2224 trans = btrfs_start_transaction(root, 1);
2253 2225
2254 btrfs_set_trans_block_group(trans, dir); 2226 btrfs_set_trans_block_group(trans, dir);
@@ -2261,7 +2233,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2261 nr = trans->blocks_used; 2233 nr = trans->blocks_used;
2262 2234
2263 btrfs_end_transaction_throttle(trans, root); 2235 btrfs_end_transaction_throttle(trans, root);
2264fail:
2265 btrfs_btree_balance_dirty(root, nr); 2236 btrfs_btree_balance_dirty(root, nr);
2266 return ret; 2237 return ret;
2267} 2238}
@@ -2284,10 +2255,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2284 return -ENOTEMPTY; 2255 return -ENOTEMPTY;
2285 } 2256 }
2286 2257
2287 ret = btrfs_check_free_space(root, 1, 1);
2288 if (ret)
2289 goto fail;
2290
2291 trans = btrfs_start_transaction(root, 1); 2258 trans = btrfs_start_transaction(root, 1);
2292 btrfs_set_trans_block_group(trans, dir); 2259 btrfs_set_trans_block_group(trans, dir);
2293 2260
@@ -2304,7 +2271,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2304fail_trans: 2271fail_trans:
2305 nr = trans->blocks_used; 2272 nr = trans->blocks_used;
2306 ret = btrfs_end_transaction_throttle(trans, root); 2273 ret = btrfs_end_transaction_throttle(trans, root);
2307fail:
2308 btrfs_btree_balance_dirty(root, nr); 2274 btrfs_btree_balance_dirty(root, nr);
2309 2275
2310 if (ret && !err) 2276 if (ret && !err)
@@ -2818,7 +2784,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2818 if (size <= hole_start) 2784 if (size <= hole_start)
2819 return 0; 2785 return 0;
2820 2786
2821 err = btrfs_check_free_space(root, 1, 0); 2787 err = btrfs_check_metadata_free_space(root);
2822 if (err) 2788 if (err)
2823 return err; 2789 return err;
2824 2790
@@ -3014,6 +2980,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3014 bi->last_trans = 0; 2980 bi->last_trans = 0;
3015 bi->logged_trans = 0; 2981 bi->logged_trans = 0;
3016 bi->delalloc_bytes = 0; 2982 bi->delalloc_bytes = 0;
2983 bi->reserved_bytes = 0;
3017 bi->disk_i_size = 0; 2984 bi->disk_i_size = 0;
3018 bi->flags = 0; 2985 bi->flags = 0;
3019 bi->index_cnt = (u64)-1; 2986 bi->index_cnt = (u64)-1;
@@ -3035,6 +3002,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
3035 inode->i_ino = args->ino; 3002 inode->i_ino = args->ino;
3036 init_btrfs_i(inode); 3003 init_btrfs_i(inode);
3037 BTRFS_I(inode)->root = args->root; 3004 BTRFS_I(inode)->root = args->root;
3005 btrfs_set_inode_space_info(args->root, inode);
3038 return 0; 3006 return 0;
3039} 3007}
3040 3008
@@ -3455,6 +3423,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3455 BTRFS_I(inode)->index_cnt = 2; 3423 BTRFS_I(inode)->index_cnt = 2;
3456 BTRFS_I(inode)->root = root; 3424 BTRFS_I(inode)->root = root;
3457 BTRFS_I(inode)->generation = trans->transid; 3425 BTRFS_I(inode)->generation = trans->transid;
3426 btrfs_set_inode_space_info(root, inode);
3458 3427
3459 if (mode & S_IFDIR) 3428 if (mode & S_IFDIR)
3460 owner = 0; 3429 owner = 0;
@@ -3602,7 +3571,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
3602 if (!new_valid_dev(rdev)) 3571 if (!new_valid_dev(rdev))
3603 return -EINVAL; 3572 return -EINVAL;
3604 3573
3605 err = btrfs_check_free_space(root, 1, 0); 3574 err = btrfs_check_metadata_free_space(root);
3606 if (err) 3575 if (err)
3607 goto fail; 3576 goto fail;
3608 3577
@@ -3665,7 +3634,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
3665 u64 objectid; 3634 u64 objectid;
3666 u64 index = 0; 3635 u64 index = 0;
3667 3636
3668 err = btrfs_check_free_space(root, 1, 0); 3637 err = btrfs_check_metadata_free_space(root);
3669 if (err) 3638 if (err)
3670 goto fail; 3639 goto fail;
3671 trans = btrfs_start_transaction(root, 1); 3640 trans = btrfs_start_transaction(root, 1);
@@ -3733,7 +3702,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
3733 return -ENOENT; 3702 return -ENOENT;
3734 3703
3735 btrfs_inc_nlink(inode); 3704 btrfs_inc_nlink(inode);
3736 err = btrfs_check_free_space(root, 1, 0); 3705 err = btrfs_check_metadata_free_space(root);
3737 if (err) 3706 if (err)
3738 goto fail; 3707 goto fail;
3739 err = btrfs_set_inode_index(dir, &index); 3708 err = btrfs_set_inode_index(dir, &index);
@@ -3779,7 +3748,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3779 u64 index = 0; 3748 u64 index = 0;
3780 unsigned long nr = 1; 3749 unsigned long nr = 1;
3781 3750
3782 err = btrfs_check_free_space(root, 1, 0); 3751 err = btrfs_check_metadata_free_space(root);
3783 if (err) 3752 if (err)
3784 goto out_unlock; 3753 goto out_unlock;
3785 3754
@@ -4336,7 +4305,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4336 u64 page_start; 4305 u64 page_start;
4337 u64 page_end; 4306 u64 page_end;
4338 4307
4339 ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); 4308 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
4340 if (ret) 4309 if (ret)
4341 goto out; 4310 goto out;
4342 4311
@@ -4349,6 +4318,7 @@ again:
4349 4318
4350 if ((page->mapping != inode->i_mapping) || 4319 if ((page->mapping != inode->i_mapping) ||
4351 (page_start >= size)) { 4320 (page_start >= size)) {
4321 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
4352 /* page got truncated out from underneath us */ 4322 /* page got truncated out from underneath us */
4353 goto out_unlock; 4323 goto out_unlock;
4354 } 4324 }
@@ -4631,7 +4601,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4631 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 4601 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
4632 return -EXDEV; 4602 return -EXDEV;
4633 4603
4634 ret = btrfs_check_free_space(root, 1, 0); 4604 ret = btrfs_check_metadata_free_space(root);
4635 if (ret) 4605 if (ret)
4636 goto out_unlock; 4606 goto out_unlock;
4637 4607
@@ -4749,7 +4719,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
4749 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 4719 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
4750 return -ENAMETOOLONG; 4720 return -ENAMETOOLONG;
4751 4721
4752 err = btrfs_check_free_space(root, 1, 0); 4722 err = btrfs_check_metadata_free_space(root);
4753 if (err) 4723 if (err)
4754 goto out_fail; 4724 goto out_fail;
4755 4725
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 988fdc8b49eb..bca729fc80c8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -70,7 +70,7 @@ static noinline int create_subvol(struct btrfs_root *root,
70 u64 index = 0; 70 u64 index = 0;
71 unsigned long nr = 1; 71 unsigned long nr = 1;
72 72
73 ret = btrfs_check_free_space(root, 1, 0); 73 ret = btrfs_check_metadata_free_space(root);
74 if (ret) 74 if (ret)
75 goto fail_commit; 75 goto fail_commit;
76 76
@@ -203,7 +203,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
203 if (!root->ref_cows) 203 if (!root->ref_cows)
204 return -EINVAL; 204 return -EINVAL;
205 205
206 ret = btrfs_check_free_space(root, 1, 0); 206 ret = btrfs_check_metadata_free_space(root);
207 if (ret) 207 if (ret)
208 goto fail_unlock; 208 goto fail_unlock;
209 209
@@ -374,7 +374,7 @@ static int btrfs_defrag_file(struct file *file)
374 unsigned long i; 374 unsigned long i;
375 int ret; 375 int ret;
376 376
377 ret = btrfs_check_free_space(root, inode->i_size, 0); 377 ret = btrfs_check_data_free_space(root, inode, inode->i_size);
378 if (ret) 378 if (ret)
379 return -ENOSPC; 379 return -ENOSPC;
380 380
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 39bd4d38e889..45e59d3c7f1f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1913,6 +1913,9 @@ COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */
1913/* 0x00 */ 1913/* 0x00 */
1914COMPATIBLE_IOCTL(FIBMAP) 1914COMPATIBLE_IOCTL(FIBMAP)
1915COMPATIBLE_IOCTL(FIGETBSZ) 1915COMPATIBLE_IOCTL(FIGETBSZ)
1916/* 'X' - originally XFS but some now in the VFS */
1917COMPATIBLE_IOCTL(FIFREEZE)
1918COMPATIBLE_IOCTL(FITHAW)
1916/* RAID */ 1919/* RAID */
1917COMPATIBLE_IOCTL(RAID_VERSION) 1920COMPATIBLE_IOCTL(RAID_VERSION)
1918COMPATIBLE_IOCTL(GET_ARRAY_INFO) 1921COMPATIBLE_IOCTL(GET_ARRAY_INFO)
diff --git a/fs/dcache.c b/fs/dcache.c
index 937df0fb0da5..07e2d4a44bda 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1180,7 +1180,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1180 iput(inode); 1180 iput(inode);
1181 return res; 1181 return res;
1182} 1182}
1183EXPORT_SYMBOL_GPL(d_obtain_alias); 1183EXPORT_SYMBOL(d_obtain_alias);
1184 1184
1185/** 1185/**
1186 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1186 * d_splice_alias - splice a disconnected dentry into the tree if one exists
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 9a50b8052dcf..de9459b4cb94 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -609,7 +609,9 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
609 */ 609 */
610int ext4_should_retry_alloc(struct super_block *sb, int *retries) 610int ext4_should_retry_alloc(struct super_block *sb, int *retries)
611{ 611{
612 if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3) 612 if (!ext4_has_free_blocks(EXT4_SB(sb), 1) ||
613 (*retries)++ > 3 ||
614 !EXT4_SB(sb)->s_journal)
613 return 0; 615 return 0;
614 616
615 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 617 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4fb86a0061d0..f18a919be70b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -715,6 +715,13 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
715 715
716 if (sbi->s_log_groups_per_flex) { 716 if (sbi->s_log_groups_per_flex) {
717 ret2 = find_group_flex(sb, dir, &group); 717 ret2 = find_group_flex(sb, dir, &group);
718 if (ret2 == -1) {
719 ret2 = find_group_other(sb, dir, &group);
720 if (ret2 == 0 && printk_ratelimit())
721 printk(KERN_NOTICE "ext4: find_group_flex "
722 "failed, fallback succeeded dir %lu\n",
723 dir->i_ino);
724 }
718 goto got_group; 725 goto got_group;
719 } 726 }
720 727
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbd2ca99d113..c7fed5b18745 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1368,6 +1368,10 @@ retry:
1368 goto out; 1368 goto out;
1369 } 1369 }
1370 1370
1371 /* We cannot recurse into the filesystem as the transaction is already
1372 * started */
1373 flags |= AOP_FLAG_NOFS;
1374
1371 page = grab_cache_page_write_begin(mapping, index, flags); 1375 page = grab_cache_page_write_begin(mapping, index, flags);
1372 if (!page) { 1376 if (!page) {
1373 ext4_journal_stop(handle); 1377 ext4_journal_stop(handle);
@@ -1377,7 +1381,7 @@ retry:
1377 *pagep = page; 1381 *pagep = page;
1378 1382
1379 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1383 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
1380 ext4_get_block); 1384 ext4_get_block);
1381 1385
1382 if (!ret && ext4_should_journal_data(inode)) { 1386 if (!ret && ext4_should_journal_data(inode)) {
1383 ret = walk_page_buffers(handle, page_buffers(page), 1387 ret = walk_page_buffers(handle, page_buffers(page),
@@ -2540,7 +2544,7 @@ retry:
2540 2544
2541 ext4_journal_stop(handle); 2545 ext4_journal_stop(handle);
2542 2546
2543 if (mpd.retval == -ENOSPC) { 2547 if ((mpd.retval == -ENOSPC) && sbi->s_journal) {
2544 /* commit the transaction which would 2548 /* commit the transaction which would
2545 * free blocks released in the transaction 2549 * free blocks released in the transaction
2546 * and try again 2550 * and try again
@@ -2667,6 +2671,9 @@ retry:
2667 ret = PTR_ERR(handle); 2671 ret = PTR_ERR(handle);
2668 goto out; 2672 goto out;
2669 } 2673 }
2674 /* We cannot recurse into the filesystem as the transaction is already
2675 * started */
2676 flags |= AOP_FLAG_NOFS;
2670 2677
2671 page = grab_cache_page_write_begin(mapping, index, flags); 2678 page = grab_cache_page_write_begin(mapping, index, flags);
2672 if (!page) { 2679 if (!page) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a5732c58f676..39d1993cfa13 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3091,7 +3091,6 @@ static int ext4_freeze(struct super_block *sb)
3091 3091
3092 /* Journal blocked and flushed, clear needs_recovery flag. */ 3092 /* Journal blocked and flushed, clear needs_recovery flag. */
3093 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 3093 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3094 ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3095 error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); 3094 error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3096 if (error) 3095 if (error)
3097 goto out; 3096 goto out;
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 3cceef4ad2b7..e9580104b6ba 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -95,13 +95,17 @@ static int jffs2_garbage_collect_thread(void *_c)
95 spin_unlock(&c->erase_completion_lock); 95 spin_unlock(&c->erase_completion_lock);
96 96
97 97
98 /* This thread is purely an optimisation. But if it runs when 98 /* Problem - immediately after bootup, the GCD spends a lot
99 other things could be running, it actually makes things a 99 * of time in places like jffs2_kill_fragtree(); so much so
100 lot worse. Use yield() and put it at the back of the runqueue 100 * that userspace processes (like gdm and X) are starved
101 every time. Especially during boot, pulling an inode in 101 * despite plenty of cond_resched()s and renicing. Yield()
102 with read_inode() is much preferable to having the GC thread 102 * doesn't help, either (presumably because userspace and GCD
103 get there first. */ 103 * are generally competing for a higher latency resource -
104 yield(); 104 * disk).
105 * This forces the GCD to slow the hell down. Pulling an
106 * inode in with read_inode() is much preferable to having
107 * the GC thread get there first. */
108 schedule_timeout_interruptible(msecs_to_jiffies(50));
105 109
106 /* Put_super will send a SIGKILL and then wait on the sem. 110 /* Put_super will send a SIGKILL and then wait on the sem.
107 */ 111 */
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 6ca08ad887c0..1fc1e92356ee 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -220,7 +220,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
220 struct jffs2_tmp_dnode_info *tn) 220 struct jffs2_tmp_dnode_info *tn)
221{ 221{
222 uint32_t fn_end = tn->fn->ofs + tn->fn->size; 222 uint32_t fn_end = tn->fn->ofs + tn->fn->size;
223 struct jffs2_tmp_dnode_info *this; 223 struct jffs2_tmp_dnode_info *this, *ptn;
224 224
225 dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw)); 225 dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw));
226 226
@@ -251,11 +251,18 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
251 if (this) { 251 if (this) {
252 /* If the node is coincident with another at a lower address, 252 /* If the node is coincident with another at a lower address,
253 back up until the other node is found. It may be relevant */ 253 back up until the other node is found. It may be relevant */
254 while (this->overlapped) 254 while (this->overlapped) {
255 this = tn_prev(this); 255 ptn = tn_prev(this);
256 256 if (!ptn) {
257 /* First node should never be marked overlapped */ 257 /*
258 BUG_ON(!this); 258 * We killed a node which set the overlapped
259 * flags during the scan. Fix it up.
260 */
261 this->overlapped = 0;
262 break;
263 }
264 this = ptn;
265 }
259 dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole"); 266 dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole");
260 } 267 }
261 268
@@ -360,7 +367,17 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
360 } 367 }
361 if (!this->overlapped) 368 if (!this->overlapped)
362 break; 369 break;
363 this = tn_prev(this); 370
371 ptn = tn_prev(this);
372 if (!ptn) {
373 /*
374 * We killed a node which set the overlapped
375 * flags during the scan. Fix it up.
376 */
377 this->overlapped = 0;
378 break;
379 }
380 this = ptn;
364 } 381 }
365 } 382 }
366 383
@@ -456,8 +473,15 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c,
456 eat_last(&rii->tn_root, &last->rb); 473 eat_last(&rii->tn_root, &last->rb);
457 ver_insert(&ver_root, last); 474 ver_insert(&ver_root, last);
458 475
459 if (unlikely(last->overlapped)) 476 if (unlikely(last->overlapped)) {
460 continue; 477 if (pen)
478 continue;
479 /*
480 * We killed a node which set the overlapped
481 * flags during the scan. Fix it up.
482 */
483 last->overlapped = 0;
484 }
461 485
462 /* Now we have a bunch of nodes in reverse version 486 /* Now we have a bunch of nodes in reverse version
463 order, in the tree at ver_root. Most of the time, 487 order, in the tree at ver_root. Most of the time,
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 60fe74035db5..3a9e5deed74d 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4796,6 +4796,29 @@ out:
4796 return ret; 4796 return ret;
4797} 4797}
4798 4798
4799static int ocfs2_replace_extent_rec(struct inode *inode,
4800 handle_t *handle,
4801 struct ocfs2_path *path,
4802 struct ocfs2_extent_list *el,
4803 int split_index,
4804 struct ocfs2_extent_rec *split_rec)
4805{
4806 int ret;
4807
4808 ret = ocfs2_path_bh_journal_access(handle, inode, path,
4809 path_num_items(path) - 1);
4810 if (ret) {
4811 mlog_errno(ret);
4812 goto out;
4813 }
4814
4815 el->l_recs[split_index] = *split_rec;
4816
4817 ocfs2_journal_dirty(handle, path_leaf_bh(path));
4818out:
4819 return ret;
4820}
4821
4799/* 4822/*
4800 * Mark part or all of the extent record at split_index in the leaf 4823 * Mark part or all of the extent record at split_index in the leaf
4801 * pointed to by path as written. This removes the unwritten 4824 * pointed to by path as written. This removes the unwritten
@@ -4885,7 +4908,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4885 4908
4886 if (ctxt.c_contig_type == CONTIG_NONE) { 4909 if (ctxt.c_contig_type == CONTIG_NONE) {
4887 if (ctxt.c_split_covers_rec) 4910 if (ctxt.c_split_covers_rec)
4888 el->l_recs[split_index] = *split_rec; 4911 ret = ocfs2_replace_extent_rec(inode, handle,
4912 path, el,
4913 split_index, split_rec);
4889 else 4914 else
4890 ret = ocfs2_split_and_insert(inode, handle, path, et, 4915 ret = ocfs2_split_and_insert(inode, handle, path, et,
4891 &last_eb_bh, split_index, 4916 &last_eb_bh, split_index,
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 54e182a27caf..0a2813947853 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1849,12 +1849,12 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
1849 if (!mle) { 1849 if (!mle) {
1850 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN && 1850 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
1851 res->owner != assert->node_idx) { 1851 res->owner != assert->node_idx) {
1852 mlog(ML_ERROR, "assert_master from " 1852 mlog(ML_ERROR, "DIE! Mastery assert from %u, "
1853 "%u, but current owner is " 1853 "but current owner is %u! (%.*s)\n",
1854 "%u! (%.*s)\n", 1854 assert->node_idx, res->owner, namelen,
1855 assert->node_idx, res->owner, 1855 name);
1856 namelen, name); 1856 __dlm_print_one_lock_resource(res);
1857 goto kill; 1857 BUG();
1858 } 1858 }
1859 } else if (mle->type != DLM_MLE_MIGRATION) { 1859 } else if (mle->type != DLM_MLE_MIGRATION) {
1860 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) { 1860 if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d1295203029f..4060bb328bc8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -181,8 +181,7 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
181 181
182 spin_lock(&res->spinlock); 182 spin_lock(&res->spinlock);
183 /* This ensures that clear refmap is sent after the set */ 183 /* This ensures that clear refmap is sent after the set */
184 __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG | 184 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
185 DLM_LOCK_RES_MIGRATING));
186 spin_unlock(&res->spinlock); 185 spin_unlock(&res->spinlock);
187 186
188 /* clear our bit from the master's refmap, ignore errors */ 187 /* clear our bit from the master's refmap, ignore errors */
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 86ca085ef324..fcf879ed6930 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -117,11 +117,11 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
117 else 117 else
118 BUG_ON(res->owner == dlm->node_num); 118 BUG_ON(res->owner == dlm->node_num);
119 119
120 spin_lock(&dlm->spinlock); 120 spin_lock(&dlm->ast_lock);
121 /* We want to be sure that we're not freeing a lock 121 /* We want to be sure that we're not freeing a lock
122 * that still has AST's pending... */ 122 * that still has AST's pending... */
123 in_use = !list_empty(&lock->ast_list); 123 in_use = !list_empty(&lock->ast_list);
124 spin_unlock(&dlm->spinlock); 124 spin_unlock(&dlm->ast_lock);
125 if (in_use) { 125 if (in_use) {
126 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " 126 mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
127 "while waiting for an ast!", res->lockname.len, 127 "while waiting for an ast!", res->lockname.len,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 206a2370876a..7219a86d34cc 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -320,9 +320,14 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
320 struct ocfs2_lock_res *lockres); 320 struct ocfs2_lock_res *lockres);
321static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, 321static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
322 int convert); 322 int convert);
323#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ 323#define ocfs2_log_dlm_error(_func, _err, _lockres) do { \
324 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ 324 if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY) \
325 _err, _func, _lockres->l_name); \ 325 mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
326 _err, _func, _lockres->l_name); \
327 else \
328 mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n", \
329 _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name, \
330 (unsigned int)ocfs2_get_dentry_lock_ino(_lockres)); \
326} while (0) 331} while (0)
327static int ocfs2_downconvert_thread(void *arg); 332static int ocfs2_downconvert_thread(void *arg);
328static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, 333static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 077384135f4e..946d3c34b90b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -341,6 +341,9 @@ struct ocfs2_super
341 struct ocfs2_node_map osb_recovering_orphan_dirs; 341 struct ocfs2_node_map osb_recovering_orphan_dirs;
342 unsigned int *osb_orphan_wipes; 342 unsigned int *osb_orphan_wipes;
343 wait_queue_head_t osb_wipe_event; 343 wait_queue_head_t osb_wipe_event;
344
345 /* used to protect metaecc calculation check of xattr. */
346 spinlock_t osb_xattr_lock;
344}; 347};
345 348
346#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 349#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index b1cb38fbe807..7ac83a81ee55 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1537,6 +1537,13 @@ static int ocfs2_get_sector(struct super_block *sb,
1537 unlock_buffer(*bh); 1537 unlock_buffer(*bh);
1538 ll_rw_block(READ, 1, bh); 1538 ll_rw_block(READ, 1, bh);
1539 wait_on_buffer(*bh); 1539 wait_on_buffer(*bh);
1540 if (!buffer_uptodate(*bh)) {
1541 mlog_errno(-EIO);
1542 brelse(*bh);
1543 *bh = NULL;
1544 return -EIO;
1545 }
1546
1540 return 0; 1547 return 0;
1541} 1548}
1542 1549
@@ -1747,6 +1754,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1747 INIT_LIST_HEAD(&osb->blocked_lock_list); 1754 INIT_LIST_HEAD(&osb->blocked_lock_list);
1748 osb->blocked_lock_count = 0; 1755 osb->blocked_lock_count = 0;
1749 spin_lock_init(&osb->osb_lock); 1756 spin_lock_init(&osb->osb_lock);
1757 spin_lock_init(&osb->osb_xattr_lock);
1750 ocfs2_init_inode_steal_slot(osb); 1758 ocfs2_init_inode_steal_slot(osb);
1751 1759
1752 atomic_set(&osb->alloc_stats.moves, 0); 1760 atomic_set(&osb->alloc_stats.moves, 0);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 915039fffe6e..4ddd788add67 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -82,13 +82,14 @@ struct ocfs2_xattr_set_ctxt {
82 82
83#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 83#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
84#define OCFS2_XATTR_INLINE_SIZE 80 84#define OCFS2_XATTR_INLINE_SIZE 80
85#define OCFS2_XATTR_HEADER_GAP 4
85#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ 86#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \
86 - sizeof(struct ocfs2_xattr_header) \ 87 - sizeof(struct ocfs2_xattr_header) \
87 - sizeof(__u32)) 88 - OCFS2_XATTR_HEADER_GAP)
88#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ 89#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \
89 - sizeof(struct ocfs2_xattr_block) \ 90 - sizeof(struct ocfs2_xattr_block) \
90 - sizeof(struct ocfs2_xattr_header) \ 91 - sizeof(struct ocfs2_xattr_header) \
91 - sizeof(__u32)) 92 - OCFS2_XATTR_HEADER_GAP)
92 93
93static struct ocfs2_xattr_def_value_root def_xv = { 94static struct ocfs2_xattr_def_value_root def_xv = {
94 .xv.xr_list.l_count = cpu_to_le16(1), 95 .xv.xr_list.l_count = cpu_to_le16(1),
@@ -274,10 +275,12 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
274 bucket->bu_blocks, bucket->bu_bhs, 0, 275 bucket->bu_blocks, bucket->bu_bhs, 0,
275 NULL); 276 NULL);
276 if (!rc) { 277 if (!rc) {
278 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
277 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb, 279 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
278 bucket->bu_bhs, 280 bucket->bu_bhs,
279 bucket->bu_blocks, 281 bucket->bu_blocks,
280 &bucket_xh(bucket)->xh_check); 282 &bucket_xh(bucket)->xh_check);
283 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
281 if (rc) 284 if (rc)
282 mlog_errno(rc); 285 mlog_errno(rc);
283 } 286 }
@@ -310,9 +313,11 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
310{ 313{
311 int i; 314 int i;
312 315
316 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
313 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb, 317 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
314 bucket->bu_bhs, bucket->bu_blocks, 318 bucket->bu_bhs, bucket->bu_blocks,
315 &bucket_xh(bucket)->xh_check); 319 &bucket_xh(bucket)->xh_check);
320 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
316 321
317 for (i = 0; i < bucket->bu_blocks; i++) 322 for (i = 0; i < bucket->bu_blocks; i++)
318 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); 323 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
@@ -1507,7 +1512,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1507 last += 1; 1512 last += 1;
1508 } 1513 }
1509 1514
1510 free = min_offs - ((void *)last - xs->base) - sizeof(__u32); 1515 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
1511 if (free < 0) 1516 if (free < 0)
1512 return -EIO; 1517 return -EIO;
1513 1518
@@ -2190,7 +2195,7 @@ static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2190 last += 1; 2195 last += 1;
2191 } 2196 }
2192 2197
2193 free = min_offs - ((void *)last - xs->base) - sizeof(__u32); 2198 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2194 if (free < 0) 2199 if (free < 0)
2195 return 0; 2200 return 0;
2196 2201
@@ -2592,8 +2597,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
2592 2597
2593 if (!ret) { 2598 if (!ret) {
2594 /* Update inode ctime. */ 2599 /* Update inode ctime. */
2595 ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh, 2600 ret = ocfs2_journal_access_di(ctxt->handle, inode,
2596 OCFS2_JOURNAL_ACCESS_WRITE); 2601 xis->inode_bh,
2602 OCFS2_JOURNAL_ACCESS_WRITE);
2597 if (ret) { 2603 if (ret) {
2598 mlog_errno(ret); 2604 mlog_errno(ret);
2599 goto out; 2605 goto out;
@@ -5060,8 +5066,8 @@ try_again:
5060 xh_free_start = le16_to_cpu(xh->xh_free_start); 5066 xh_free_start = le16_to_cpu(xh->xh_free_start);
5061 header_size = sizeof(struct ocfs2_xattr_header) + 5067 header_size = sizeof(struct ocfs2_xattr_header) +
5062 count * sizeof(struct ocfs2_xattr_entry); 5068 count * sizeof(struct ocfs2_xattr_entry);
5063 max_free = OCFS2_XATTR_BUCKET_SIZE - 5069 max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5064 le16_to_cpu(xh->xh_name_value_len) - header_size; 5070 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5065 5071
5066 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " 5072 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5067 "of %u which exceed block size\n", 5073 "of %u which exceed block size\n",
@@ -5094,7 +5100,7 @@ try_again:
5094 need = 0; 5100 need = 0;
5095 } 5101 }
5096 5102
5097 free = xh_free_start - header_size; 5103 free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5098 /* 5104 /*
5099 * We need to make sure the new name/value pair 5105 * We need to make sure the new name/value pair
5100 * can exist in the same block. 5106 * can exist in the same block.
@@ -5127,7 +5133,8 @@ try_again:
5127 } 5133 }
5128 5134
5129 xh_free_start = le16_to_cpu(xh->xh_free_start); 5135 xh_free_start = le16_to_cpu(xh->xh_free_start);
5130 free = xh_free_start - header_size; 5136 free = xh_free_start - header_size
5137 - OCFS2_XATTR_HEADER_GAP;
5131 if (xh_free_start % blocksize < need) 5138 if (xh_free_start % blocksize < need)
5132 free -= xh_free_start % blocksize; 5139 free -= xh_free_start % blocksize;
5133 5140
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3e76bb9b3ad6..d8bb5c671f42 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -485,8 +485,10 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
485 } 485 }
486 } 486 }
487 unlock_new_inode(inode); 487 unlock_new_inode(inode);
488 } else 488 } else {
489 module_put(de->owner); 489 module_put(de->owner);
490 de_put(de);
491 }
490 return inode; 492 return inode;
491 493
492out_ino: 494out_ino:
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 767d95a6d1b1..2d1345112a42 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -107,7 +107,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
107 else 107 else
108 kflags = ppage->flags; 108 kflags = ppage->flags;
109 109
110 uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | 110 uflags = kpf_copy_bit(kflags, KPF_LOCKED, PG_locked) |
111 kpf_copy_bit(kflags, KPF_ERROR, PG_error) | 111 kpf_copy_bit(kflags, KPF_ERROR, PG_error) |
112 kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | 112 kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) |
113 kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | 113 kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) |