aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c118
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/check-integrity.c16
-rw-r--r--fs/btrfs/ctree.c146
-rw-r--r--fs/btrfs/ctree.h11
-rw-r--r--fs/btrfs/delayed-inode.c18
-rw-r--r--fs/btrfs/delayed-inode.h3
-rw-r--r--fs/btrfs/disk-io.c111
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/extent_io.c21
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/free-space-cache.c145
-rw-r--r--fs/btrfs/inode.c130
-rw-r--r--fs/btrfs/ioctl.c118
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/ordered-data.c22
-rw-r--r--fs/btrfs/rcu-string.h56
-rw-r--r--fs/btrfs/scrub.c30
-rw-r--r--fs/btrfs/super.c37
-rw-r--r--fs/btrfs/transaction.c14
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/volumes.c187
-rw-r--r--fs/btrfs/volumes.h5
-rw-r--r--fs/buffer.c22
-rw-r--r--fs/ceph/addr.c21
-rw-r--r--fs/cifs/cifssmb.c30
-rw-r--r--fs/cifs/connect.c59
-rw-r--r--fs/cifs/readdir.c7
-rw-r--r--fs/cifs/transport.c26
-rw-r--r--fs/ecryptfs/kthread.c2
-rw-r--r--fs/ecryptfs/miscdev.c48
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/ore.c8
-rw-r--r--fs/exofs/ore_raid.c91
-rw-r--r--fs/exofs/sys.c2
-rw-r--r--fs/ext4/ioctl.c1
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fifo.c9
-rw-r--r--fs/fs-writeback.c1
-rw-r--r--fs/hfsplus/ioctl.c9
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/nfs/callback.c11
-rw-r--r--fs/nfs/callback_xdr.c8
-rw-r--r--fs/nfs/client.c3
-rw-r--r--fs/nfs/direct.c15
-rw-r--r--fs/nfs/idmap.c13
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c42
-rw-r--r--fs/nfs/nfs4state.c22
-rw-r--r--fs/nfs/nfs4xdr.c15
-rw-r--r--fs/nfs/objlayout/objio_osd.c25
-rw-r--r--fs/nfs/pnfs.c13
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/super.c5
-rw-r--r--fs/nfs/write.c7
-rw-r--r--fs/nfsd/nfs4state.c4
-rw-r--r--fs/nilfs2/gcinode.c2
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/ocfs2/dlmglue.c33
-rw-r--r--fs/ocfs2/extent_map.c2
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/open.c6
-rw-r--r--fs/pstore/inode.c2
-rw-r--r--fs/pstore/platform.c34
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/pstore/ram_core.c27
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/splice.c35
-rw-r--r--fs/ubifs/debug.c8
-rw-r--r--fs/ubifs/find.c4
-rw-r--r--fs/ubifs/sb.c8
-rw-r--r--fs/udf/super.c102
-rw-r--r--fs/xfs/xfs_alloc.c20
-rw-r--r--fs/xfs/xfs_aops.c11
-rw-r--r--fs/xfs/xfs_buf.c69
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_inode_item.c17
-rw-r--r--fs/xfs/xfs_log.c77
-rw-r--r--fs/xfs/xfs_log_cil.c22
-rw-r--r--fs/xfs/xfs_log_priv.h46
-rw-r--r--fs/xfs/xfs_log_recover.c38
-rw-r--r--fs/xfs/xfs_mount.h4
-rw-r--r--fs/xfs/xfs_sync.c32
-rw-r--r--fs/xfs/xfs_trace.h18
90 files changed, 1482 insertions, 921 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 3f75895c919b..a383c18e74e8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -179,60 +179,74 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
179 179
180static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 180static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
181 struct ulist *parents, int level, 181 struct ulist *parents, int level,
182 struct btrfs_key *key, u64 wanted_disk_byte, 182 struct btrfs_key *key_for_search, u64 time_seq,
183 u64 wanted_disk_byte,
183 const u64 *extent_item_pos) 184 const u64 *extent_item_pos)
184{ 185{
185 int ret; 186 int ret = 0;
186 int slot = path->slots[level]; 187 int slot;
187 struct extent_buffer *eb = path->nodes[level]; 188 struct extent_buffer *eb;
189 struct btrfs_key key;
188 struct btrfs_file_extent_item *fi; 190 struct btrfs_file_extent_item *fi;
189 struct extent_inode_elem *eie = NULL; 191 struct extent_inode_elem *eie = NULL;
190 u64 disk_byte; 192 u64 disk_byte;
191 u64 wanted_objectid = key->objectid;
192 193
193add_parent: 194 if (level != 0) {
194 if (level == 0 && extent_item_pos) { 195 eb = path->nodes[level];
195 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 196 ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
196 ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie);
197 if (ret < 0) 197 if (ret < 0)
198 return ret; 198 return ret;
199 }
200 ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS);
201 if (ret < 0)
202 return ret;
203
204 if (level != 0)
205 return 0; 199 return 0;
200 }
206 201
207 /* 202 /*
208 * if the current leaf is full with EXTENT_DATA items, we must 203 * We normally enter this function with the path already pointing to
209 * check the next one if that holds a reference as well. 204 * the first item to check. But sometimes, we may enter it with
210 * ref->count cannot be used to skip this check. 205 * slot==nritems. In that case, go to the next leaf before we continue.
211 * repeat this until we don't find any additional EXTENT_DATA items.
212 */ 206 */
213 while (1) { 207 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
214 eie = NULL; 208 ret = btrfs_next_old_leaf(root, path, time_seq);
215 ret = btrfs_next_leaf(root, path);
216 if (ret < 0)
217 return ret;
218 if (ret)
219 return 0;
220 209
210 while (!ret) {
221 eb = path->nodes[0]; 211 eb = path->nodes[0];
222 for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { 212 slot = path->slots[0];
223 btrfs_item_key_to_cpu(eb, key, slot); 213
224 if (key->objectid != wanted_objectid || 214 btrfs_item_key_to_cpu(eb, &key, slot);
225 key->type != BTRFS_EXTENT_DATA_KEY) 215
226 return 0; 216 if (key.objectid != key_for_search->objectid ||
227 fi = btrfs_item_ptr(eb, slot, 217 key.type != BTRFS_EXTENT_DATA_KEY)
228 struct btrfs_file_extent_item); 218 break;
229 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 219
230 if (disk_byte == wanted_disk_byte) 220 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
231 goto add_parent; 221 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
222
223 if (disk_byte == wanted_disk_byte) {
224 eie = NULL;
225 if (extent_item_pos) {
226 ret = check_extent_in_eb(&key, eb, fi,
227 *extent_item_pos,
228 &eie);
229 if (ret < 0)
230 break;
231 }
232 if (!ret) {
233 ret = ulist_add(parents, eb->start,
234 (unsigned long)eie, GFP_NOFS);
235 if (ret < 0)
236 break;
237 if (!extent_item_pos) {
238 ret = btrfs_next_old_leaf(root, path,
239 time_seq);
240 continue;
241 }
242 }
232 } 243 }
244 ret = btrfs_next_old_item(root, path, time_seq);
233 } 245 }
234 246
235 return 0; 247 if (ret > 0)
248 ret = 0;
249 return ret;
236} 250}
237 251
238/* 252/*
@@ -249,7 +263,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
249 struct btrfs_path *path; 263 struct btrfs_path *path;
250 struct btrfs_root *root; 264 struct btrfs_root *root;
251 struct btrfs_key root_key; 265 struct btrfs_key root_key;
252 struct btrfs_key key = {0};
253 struct extent_buffer *eb; 266 struct extent_buffer *eb;
254 int ret = 0; 267 int ret = 0;
255 int root_level; 268 int root_level;
@@ -288,25 +301,19 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
288 goto out; 301 goto out;
289 302
290 eb = path->nodes[level]; 303 eb = path->nodes[level];
291 if (!eb) { 304 while (!eb) {
292 WARN_ON(1); 305 if (!level) {
293 ret = 1; 306 WARN_ON(1);
294 goto out; 307 ret = 1;
295 } 308 goto out;
296
297 if (level == 0) {
298 if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) {
299 ret = btrfs_next_leaf(root, path);
300 if (ret)
301 goto out;
302 eb = path->nodes[0];
303 } 309 }
304 310 level--;
305 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 311 eb = path->nodes[level];
306 } 312 }
307 313
308 ret = add_all_parents(root, path, parents, level, &key, 314 ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
309 ref->wanted_disk_byte, extent_item_pos); 315 time_seq, ref->wanted_disk_byte,
316 extent_item_pos);
310out: 317out:
311 btrfs_free_path(path); 318 btrfs_free_path(path);
312 return ret; 319 return ret;
@@ -832,6 +839,7 @@ again:
832 } 839 }
833 ret = __add_delayed_refs(head, delayed_ref_seq, 840 ret = __add_delayed_refs(head, delayed_ref_seq,
834 &prefs_delayed); 841 &prefs_delayed);
842 mutex_unlock(&head->mutex);
835 if (ret) { 843 if (ret) {
836 spin_unlock(&delayed_refs->lock); 844 spin_unlock(&delayed_refs->lock);
837 goto out; 845 goto out;
@@ -925,8 +933,6 @@ again:
925 } 933 }
926 934
927out: 935out:
928 if (head)
929 mutex_unlock(&head->mutex);
930 btrfs_free_path(path); 936 btrfs_free_path(path);
931 while (!list_empty(&prefs)) { 937 while (!list_empty(&prefs)) {
932 ref = list_first_entry(&prefs, struct __prelim_ref, list); 938 ref = list_first_entry(&prefs, struct __prelim_ref, list);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index e616f8872e69..12394a90d60f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -37,6 +37,7 @@
37#define BTRFS_INODE_IN_DEFRAG 3 37#define BTRFS_INODE_IN_DEFRAG 3
38#define BTRFS_INODE_DELALLOC_META_RESERVED 4 38#define BTRFS_INODE_DELALLOC_META_RESERVED 4
39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
40 41
41/* in memory btrfs inode */ 42/* in memory btrfs inode */
42struct btrfs_inode { 43struct btrfs_inode {
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 9cebb1fd6a3c..da6e9364a5e3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -93,6 +93,7 @@
93#include "print-tree.h" 93#include "print-tree.h"
94#include "locking.h" 94#include "locking.h"
95#include "check-integrity.h" 95#include "check-integrity.h"
96#include "rcu-string.h"
96 97
97#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 98#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
98#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 99#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
@@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror(
843 superblock_tmp->never_written = 0; 844 superblock_tmp->never_written = 0;
844 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 845 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
845 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 846 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
846 printk(KERN_INFO "New initial S-block (bdev %p, %s)" 847 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
847 " @%llu (%s/%llu/%d)\n", 848 " @%llu (%s/%llu/%d)\n",
848 superblock_bdev, device->name, 849 superblock_bdev,
849 (unsigned long long)dev_bytenr, 850 rcu_str_deref(device->name),
850 dev_state->name, 851 (unsigned long long)dev_bytenr,
851 (unsigned long long)dev_bytenr, 852 dev_state->name,
852 superblock_mirror_num); 853 (unsigned long long)dev_bytenr,
854 superblock_mirror_num);
853 list_add(&superblock_tmp->all_blocks_node, 855 list_add(&superblock_tmp->all_blocks_node,
854 &state->all_blocks_list); 856 &state->all_blocks_list);
855 btrfsic_block_hashtable_add(superblock_tmp, 857 btrfsic_block_hashtable_add(superblock_tmp,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d7a96cfdc50a..8206b3900587 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
467 return 0; 467 return 0;
468} 468}
469 469
470/*
471 * This allocates memory and gets a tree modification sequence number when
472 * needed.
473 *
474 * Returns 0 when no sequence number is needed, < 0 on error.
475 * Returns 1 when a sequence number was added. In this case,
476 * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
477 * after inserting into the rb tree.
478 */
470static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, 479static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
471 struct tree_mod_elem **tm_ret) 480 struct tree_mod_elem **tm_ret)
472{ 481{
@@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
491 */ 500 */
492 kfree(tm); 501 kfree(tm);
493 seq = 0; 502 seq = 0;
503 spin_unlock(&fs_info->tree_mod_seq_lock);
494 } else { 504 } else {
495 __get_tree_mod_seq(fs_info, &tm->elem); 505 __get_tree_mod_seq(fs_info, &tm->elem);
496 seq = tm->elem.seq; 506 seq = tm->elem.seq;
497 } 507 }
498 spin_unlock(&fs_info->tree_mod_seq_lock);
499 508
500 return seq; 509 return seq;
501} 510}
@@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
521 tm->slot = slot; 530 tm->slot = slot;
522 tm->generation = btrfs_node_ptr_generation(eb, slot); 531 tm->generation = btrfs_node_ptr_generation(eb, slot);
523 532
524 return __tree_mod_log_insert(fs_info, tm); 533 ret = __tree_mod_log_insert(fs_info, tm);
534 spin_unlock(&fs_info->tree_mod_seq_lock);
535 return ret;
525} 536}
526 537
527static noinline int 538static noinline int
@@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
559 tm->move.nr_items = nr_items; 570 tm->move.nr_items = nr_items;
560 tm->op = MOD_LOG_MOVE_KEYS; 571 tm->op = MOD_LOG_MOVE_KEYS;
561 572
562 return __tree_mod_log_insert(fs_info, tm); 573 ret = __tree_mod_log_insert(fs_info, tm);
574 spin_unlock(&fs_info->tree_mod_seq_lock);
575 return ret;
563} 576}
564 577
565static noinline int 578static noinline int
@@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
580 tm->generation = btrfs_header_generation(old_root); 593 tm->generation = btrfs_header_generation(old_root);
581 tm->op = MOD_LOG_ROOT_REPLACE; 594 tm->op = MOD_LOG_ROOT_REPLACE;
582 595
583 return __tree_mod_log_insert(fs_info, tm); 596 ret = __tree_mod_log_insert(fs_info, tm);
597 spin_unlock(&fs_info->tree_mod_seq_lock);
598 return ret;
584} 599}
585 600
586static struct tree_mod_elem * 601static struct tree_mod_elem *
@@ -1009,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1009 if (!looped && !tm) 1024 if (!looped && !tm)
1010 return 0; 1025 return 0;
1011 /* 1026 /*
1012 * we must have key remove operations in the log before the 1027 * if there are no tree operation for the oldest root, we simply
1013 * replace operation. 1028 * return it. this should only happen if that (old) root is at
1029 * level 0.
1014 */ 1030 */
1015 BUG_ON(!tm); 1031 if (!tm)
1032 break;
1016 1033
1034 /*
1035 * if there's an operation that's not a root replacement, we
1036 * found the oldest version of our root. normally, we'll find a
1037 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
1038 */
1017 if (tm->op != MOD_LOG_ROOT_REPLACE) 1039 if (tm->op != MOD_LOG_ROOT_REPLACE)
1018 break; 1040 break;
1019 1041
@@ -1023,6 +1045,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1023 looped = 1; 1045 looped = 1;
1024 } 1046 }
1025 1047
1048 /* if there's no old root to return, return what we found instead */
1049 if (!found)
1050 found = tm;
1051
1026 return found; 1052 return found;
1027} 1053}
1028 1054
@@ -1068,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1068 tm->generation); 1094 tm->generation);
1069 break; 1095 break;
1070 case MOD_LOG_KEY_ADD: 1096 case MOD_LOG_KEY_ADD:
1071 if (tm->slot != n - 1) { 1097 /* if a move operation is needed it's in the log */
1072 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1073 o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
1074 memmove_extent_buffer(eb, o_dst, o_src, p_size);
1075 }
1076 n--; 1098 n--;
1077 break; 1099 break;
1078 case MOD_LOG_MOVE_KEYS: 1100 case MOD_LOG_MOVE_KEYS:
@@ -1143,45 +1165,57 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1143 return eb_rewin; 1165 return eb_rewin;
1144} 1166}
1145 1167
1168/*
1169 * get_old_root() rewinds the state of @root's root node to the given @time_seq
1170 * value. If there are no changes, the current root->root_node is returned. If
1171 * anything changed in between, there's a fresh buffer allocated on which the
1172 * rewind operations are done. In any case, the returned buffer is read locked.
1173 * Returns NULL on error (with no locks held).
1174 */
1146static inline struct extent_buffer * 1175static inline struct extent_buffer *
1147get_old_root(struct btrfs_root *root, u64 time_seq) 1176get_old_root(struct btrfs_root *root, u64 time_seq)
1148{ 1177{
1149 struct tree_mod_elem *tm; 1178 struct tree_mod_elem *tm;
1150 struct extent_buffer *eb; 1179 struct extent_buffer *eb;
1151 struct tree_mod_root *old_root; 1180 struct tree_mod_root *old_root = NULL;
1152 u64 old_generation; 1181 u64 old_generation = 0;
1182 u64 logical;
1153 1183
1184 eb = btrfs_read_lock_root_node(root);
1154 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1185 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1155 if (!tm) 1186 if (!tm)
1156 return root->node; 1187 return root->node;
1157 1188
1158 old_root = &tm->old_root; 1189 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1159 old_generation = tm->generation; 1190 old_root = &tm->old_root;
1160 1191 old_generation = tm->generation;
1161 tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); 1192 logical = old_root->logical;
1162 /* 1193 } else {
1163 * there was an item in the log when __tree_mod_log_oldest_root 1194 logical = root->node->start;
1164 * returned. this one must not go away, because the time_seq passed to 1195 }
1165 * us must be blocking its removal.
1166 */
1167 BUG_ON(!tm);
1168 1196
1169 if (old_root->logical == root->node->start) { 1197 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1170 /* there are logged operations for the current root */ 1198 if (old_root)
1199 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1200 else
1171 eb = btrfs_clone_extent_buffer(root->node); 1201 eb = btrfs_clone_extent_buffer(root->node);
1172 } else { 1202 btrfs_tree_read_unlock(root->node);
1173 /* there's a root replace operation for the current root */ 1203 free_extent_buffer(root->node);
1174 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, 1204 if (!eb)
1175 root->nodesize); 1205 return NULL;
1206 btrfs_tree_read_lock(eb);
1207 if (old_root) {
1176 btrfs_set_header_bytenr(eb, eb->start); 1208 btrfs_set_header_bytenr(eb, eb->start);
1177 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); 1209 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1178 btrfs_set_header_owner(eb, root->root_key.objectid); 1210 btrfs_set_header_owner(eb, root->root_key.objectid);
1211 btrfs_set_header_level(eb, old_root->level);
1212 btrfs_set_header_generation(eb, old_generation);
1179 } 1213 }
1180 if (!eb) 1214 if (tm)
1181 return NULL; 1215 __tree_mod_log_rewind(eb, time_seq, tm);
1182 btrfs_set_header_level(eb, old_root->level); 1216 else
1183 btrfs_set_header_generation(eb, old_generation); 1217 WARN_ON(btrfs_header_level(eb) != 0);
1184 __tree_mod_log_rewind(eb, time_seq, tm); 1218 extent_buffer_get(eb);
1185 1219
1186 return eb; 1220 return eb;
1187} 1221}
@@ -1650,8 +1684,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1650 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1684 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1651 return 0; 1685 return 0;
1652 1686
1653 btrfs_header_nritems(mid);
1654
1655 left = read_node_slot(root, parent, pslot - 1); 1687 left = read_node_slot(root, parent, pslot - 1);
1656 if (left) { 1688 if (left) {
1657 btrfs_tree_lock(left); 1689 btrfs_tree_lock(left);
@@ -1681,7 +1713,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1681 wret = push_node_left(trans, root, left, mid, 1); 1713 wret = push_node_left(trans, root, left, mid, 1);
1682 if (wret < 0) 1714 if (wret < 0)
1683 ret = wret; 1715 ret = wret;
1684 btrfs_header_nritems(mid);
1685 } 1716 }
1686 1717
1687 /* 1718 /*
@@ -2615,9 +2646,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2615 2646
2616again: 2647again:
2617 b = get_old_root(root, time_seq); 2648 b = get_old_root(root, time_seq);
2618 extent_buffer_get(b);
2619 level = btrfs_header_level(b); 2649 level = btrfs_header_level(b);
2620 btrfs_tree_read_lock(b);
2621 p->locks[level] = BTRFS_READ_LOCK; 2650 p->locks[level] = BTRFS_READ_LOCK;
2622 2651
2623 while (b) { 2652 while (b) {
@@ -2964,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2964static void insert_ptr(struct btrfs_trans_handle *trans, 2993static void insert_ptr(struct btrfs_trans_handle *trans,
2965 struct btrfs_root *root, struct btrfs_path *path, 2994 struct btrfs_root *root, struct btrfs_path *path,
2966 struct btrfs_disk_key *key, u64 bytenr, 2995 struct btrfs_disk_key *key, u64 bytenr,
2967 int slot, int level, int tree_mod_log) 2996 int slot, int level)
2968{ 2997{
2969 struct extent_buffer *lower; 2998 struct extent_buffer *lower;
2970 int nritems; 2999 int nritems;
@@ -2977,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
2977 BUG_ON(slot > nritems); 3006 BUG_ON(slot > nritems);
2978 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); 3007 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
2979 if (slot != nritems) { 3008 if (slot != nritems) {
2980 if (tree_mod_log && level) 3009 if (level)
2981 tree_mod_log_eb_move(root->fs_info, lower, slot + 1, 3010 tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
2982 slot, nritems - slot); 3011 slot, nritems - slot);
2983 memmove_extent_buffer(lower, 3012 memmove_extent_buffer(lower,
@@ -2985,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
2985 btrfs_node_key_ptr_offset(slot), 3014 btrfs_node_key_ptr_offset(slot),
2986 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 3015 (nritems - slot) * sizeof(struct btrfs_key_ptr));
2987 } 3016 }
2988 if (tree_mod_log && level) { 3017 if (level) {
2989 ret = tree_mod_log_insert_key(root->fs_info, lower, slot, 3018 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
2990 MOD_LOG_KEY_ADD); 3019 MOD_LOG_KEY_ADD);
2991 BUG_ON(ret < 0); 3020 BUG_ON(ret < 0);
@@ -3073,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3073 btrfs_mark_buffer_dirty(split); 3102 btrfs_mark_buffer_dirty(split);
3074 3103
3075 insert_ptr(trans, root, path, &disk_key, split->start, 3104 insert_ptr(trans, root, path, &disk_key, split->start,
3076 path->slots[level + 1] + 1, level + 1, 1); 3105 path->slots[level + 1] + 1, level + 1);
3077 3106
3078 if (path->slots[level] >= mid) { 3107 if (path->slots[level] >= mid) {
3079 path->slots[level] -= mid; 3108 path->slots[level] -= mid;
@@ -3610,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3610 btrfs_set_header_nritems(l, mid); 3639 btrfs_set_header_nritems(l, mid);
3611 btrfs_item_key(right, &disk_key, 0); 3640 btrfs_item_key(right, &disk_key, 0);
3612 insert_ptr(trans, root, path, &disk_key, right->start, 3641 insert_ptr(trans, root, path, &disk_key, right->start,
3613 path->slots[1] + 1, 1, 0); 3642 path->slots[1] + 1, 1);
3614 3643
3615 btrfs_mark_buffer_dirty(right); 3644 btrfs_mark_buffer_dirty(right);
3616 btrfs_mark_buffer_dirty(l); 3645 btrfs_mark_buffer_dirty(l);
@@ -3817,7 +3846,7 @@ again:
3817 if (mid <= slot) { 3846 if (mid <= slot) {
3818 btrfs_set_header_nritems(right, 0); 3847 btrfs_set_header_nritems(right, 0);
3819 insert_ptr(trans, root, path, &disk_key, right->start, 3848 insert_ptr(trans, root, path, &disk_key, right->start,
3820 path->slots[1] + 1, 1, 0); 3849 path->slots[1] + 1, 1);
3821 btrfs_tree_unlock(path->nodes[0]); 3850 btrfs_tree_unlock(path->nodes[0]);
3822 free_extent_buffer(path->nodes[0]); 3851 free_extent_buffer(path->nodes[0]);
3823 path->nodes[0] = right; 3852 path->nodes[0] = right;
@@ -3826,7 +3855,7 @@ again:
3826 } else { 3855 } else {
3827 btrfs_set_header_nritems(right, 0); 3856 btrfs_set_header_nritems(right, 0);
3828 insert_ptr(trans, root, path, &disk_key, right->start, 3857 insert_ptr(trans, root, path, &disk_key, right->start,
3829 path->slots[1], 1, 0); 3858 path->slots[1], 1);
3830 btrfs_tree_unlock(path->nodes[0]); 3859 btrfs_tree_unlock(path->nodes[0]);
3831 free_extent_buffer(path->nodes[0]); 3860 free_extent_buffer(path->nodes[0]);
3832 path->nodes[0] = right; 3861 path->nodes[0] = right;
@@ -5001,6 +5030,12 @@ next:
5001 */ 5030 */
5002int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 5031int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
5003{ 5032{
5033 return btrfs_next_old_leaf(root, path, 0);
5034}
5035
5036int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5037 u64 time_seq)
5038{
5004 int slot; 5039 int slot;
5005 int level; 5040 int level;
5006 struct extent_buffer *c; 5041 struct extent_buffer *c;
@@ -5025,7 +5060,10 @@ again:
5025 path->keep_locks = 1; 5060 path->keep_locks = 1;
5026 path->leave_spinning = 1; 5061 path->leave_spinning = 1;
5027 5062
5028 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5063 if (time_seq)
5064 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5065 else
5066 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5029 path->keep_locks = 0; 5067 path->keep_locks = 0;
5030 5068
5031 if (ret < 0) 5069 if (ret < 0)
@@ -5081,6 +5119,18 @@ again:
5081 5119
5082 if (!path->skip_locking) { 5120 if (!path->skip_locking) {
5083 ret = btrfs_try_tree_read_lock(next); 5121 ret = btrfs_try_tree_read_lock(next);
5122 if (!ret && time_seq) {
5123 /*
5124 * If we don't get the lock, we may be racing
5125 * with push_leaf_left, holding that lock while
5126 * itself waiting for the leaf we've currently
5127 * locked. To solve this situation, we give up
5128 * on our lock and cycle.
5129 */
5130 btrfs_release_path(path);
5131 cond_resched();
5132 goto again;
5133 }
5084 if (!ret) { 5134 if (!ret) {
5085 btrfs_set_path_blocking(path); 5135 btrfs_set_path_blocking(path);
5086 btrfs_tree_read_lock(next); 5136 btrfs_tree_read_lock(next);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0236d03c6732..fa5c45b39075 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2753,13 +2753,20 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2753} 2753}
2754 2754
2755int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 2755int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
2756static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) 2756int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
2757 u64 time_seq);
2758static inline int btrfs_next_old_item(struct btrfs_root *root,
2759 struct btrfs_path *p, u64 time_seq)
2757{ 2760{
2758 ++p->slots[0]; 2761 ++p->slots[0];
2759 if (p->slots[0] >= btrfs_header_nritems(p->nodes[0])) 2762 if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
2760 return btrfs_next_leaf(root, p); 2763 return btrfs_next_old_leaf(root, p, time_seq);
2761 return 0; 2764 return 0;
2762} 2765}
2766static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
2767{
2768 return btrfs_next_old_item(root, p, 0);
2769}
2763int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); 2770int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
2764int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 2771int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
2765int __must_check btrfs_drop_snapshot(struct btrfs_root *root, 2772int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index c18d0442ae6d..2399f4086915 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1879,3 +1879,21 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1879 } 1879 }
1880 } 1880 }
1881} 1881}
1882
1883void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1884{
1885 struct btrfs_delayed_root *delayed_root;
1886 struct btrfs_delayed_node *curr_node, *prev_node;
1887
1888 delayed_root = btrfs_get_delayed_root(root);
1889
1890 curr_node = btrfs_first_delayed_node(delayed_root);
1891 while (curr_node) {
1892 __btrfs_kill_delayed_node(curr_node);
1893
1894 prev_node = curr_node;
1895 curr_node = btrfs_next_delayed_node(curr_node);
1896 btrfs_release_delayed_node(prev_node);
1897 }
1898}
1899
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 7083d08b2a21..f5aa4023d3e1 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -124,6 +124,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev);
124/* Used for drop dead root */ 124/* Used for drop dead root */
125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); 125void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
126 126
127/* Used for clean the transaction */
128void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
129
127/* Used for readdir() */ 130/* Used for readdir() */
128void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, 131void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
129 struct list_head *del_list); 132 struct list_head *del_list);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7ae51decf6d3..2936ca49b3b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,7 @@
44#include "free-space-cache.h" 44#include "free-space-cache.h"
45#include "inode-map.h" 45#include "inode-map.h"
46#include "check-integrity.h" 46#include "check-integrity.h"
47#include "rcu-string.h"
47 48
48static struct extent_io_ops btree_extent_io_ops; 49static struct extent_io_ops btree_extent_io_ops;
49static void end_workqueue_fn(struct btrfs_work *work); 50static void end_workqueue_fn(struct btrfs_work *work);
@@ -2118,7 +2119,7 @@ int open_ctree(struct super_block *sb,
2118 2119
2119 features = btrfs_super_incompat_flags(disk_super); 2120 features = btrfs_super_incompat_flags(disk_super);
2120 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; 2121 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
2121 if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) 2122 if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
2122 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2123 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2123 2124
2124 /* 2125 /*
@@ -2353,12 +2354,17 @@ retry_root_backup:
2353 BTRFS_CSUM_TREE_OBJECTID, csum_root); 2354 BTRFS_CSUM_TREE_OBJECTID, csum_root);
2354 if (ret) 2355 if (ret)
2355 goto recovery_tree_root; 2356 goto recovery_tree_root;
2356
2357 csum_root->track_dirty = 1; 2357 csum_root->track_dirty = 1;
2358 2358
2359 fs_info->generation = generation; 2359 fs_info->generation = generation;
2360 fs_info->last_trans_committed = generation; 2360 fs_info->last_trans_committed = generation;
2361 2361
2362 ret = btrfs_recover_balance(fs_info);
2363 if (ret) {
2364 printk(KERN_WARNING "btrfs: failed to recover balance\n");
2365 goto fail_block_groups;
2366 }
2367
2362 ret = btrfs_init_dev_stats(fs_info); 2368 ret = btrfs_init_dev_stats(fs_info);
2363 if (ret) { 2369 if (ret) {
2364 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", 2370 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2484,20 +2490,23 @@ retry_root_backup:
2484 goto fail_trans_kthread; 2490 goto fail_trans_kthread;
2485 } 2491 }
2486 2492
2487 if (!(sb->s_flags & MS_RDONLY)) { 2493 if (sb->s_flags & MS_RDONLY)
2488 down_read(&fs_info->cleanup_work_sem); 2494 return 0;
2489 err = btrfs_orphan_cleanup(fs_info->fs_root);
2490 if (!err)
2491 err = btrfs_orphan_cleanup(fs_info->tree_root);
2492 up_read(&fs_info->cleanup_work_sem);
2493 2495
2494 if (!err) 2496 down_read(&fs_info->cleanup_work_sem);
2495 err = btrfs_recover_balance(fs_info->tree_root); 2497 if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
2498 (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
2499 up_read(&fs_info->cleanup_work_sem);
2500 close_ctree(tree_root);
2501 return ret;
2502 }
2503 up_read(&fs_info->cleanup_work_sem);
2496 2504
2497 if (err) { 2505 ret = btrfs_resume_balance_async(fs_info);
2498 close_ctree(tree_root); 2506 if (ret) {
2499 return err; 2507 printk(KERN_WARNING "btrfs: failed to resume balance\n");
2500 } 2508 close_ctree(tree_root);
2509 return ret;
2501 } 2510 }
2502 2511
2503 return 0; 2512 return 0;
@@ -2575,8 +2584,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2575 struct btrfs_device *device = (struct btrfs_device *) 2584 struct btrfs_device *device = (struct btrfs_device *)
2576 bh->b_private; 2585 bh->b_private;
2577 2586
2578 printk_ratelimited(KERN_WARNING "lost page write due to " 2587 printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
2579 "I/O error on %s\n", device->name); 2588 "I/O error on %s\n",
2589 rcu_str_deref(device->name));
2580 /* note, we dont' set_buffer_write_io_error because we have 2590 /* note, we dont' set_buffer_write_io_error because we have
2581 * our own ways of dealing with the IO errors 2591 * our own ways of dealing with the IO errors
2582 */ 2592 */
@@ -2749,8 +2759,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
2749 wait_for_completion(&device->flush_wait); 2759 wait_for_completion(&device->flush_wait);
2750 2760
2751 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 2761 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
2752 printk("btrfs: disabling barriers on dev %s\n", 2762 printk_in_rcu("btrfs: disabling barriers on dev %s\n",
2753 device->name); 2763 rcu_str_deref(device->name));
2754 device->nobarriers = 1; 2764 device->nobarriers = 1;
2755 } 2765 }
2756 if (!bio_flagged(bio, BIO_UPTODATE)) { 2766 if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3400,7 +3410,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3400 3410
3401 delayed_refs = &trans->delayed_refs; 3411 delayed_refs = &trans->delayed_refs;
3402 3412
3403again:
3404 spin_lock(&delayed_refs->lock); 3413 spin_lock(&delayed_refs->lock);
3405 if (delayed_refs->num_entries == 0) { 3414 if (delayed_refs->num_entries == 0) {
3406 spin_unlock(&delayed_refs->lock); 3415 spin_unlock(&delayed_refs->lock);
@@ -3408,31 +3417,37 @@ again:
3408 return ret; 3417 return ret;
3409 } 3418 }
3410 3419
3411 node = rb_first(&delayed_refs->root); 3420 while ((node = rb_first(&delayed_refs->root)) != NULL) {
3412 while (node) {
3413 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3421 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3414 node = rb_next(node);
3415
3416 ref->in_tree = 0;
3417 rb_erase(&ref->rb_node, &delayed_refs->root);
3418 delayed_refs->num_entries--;
3419 3422
3420 atomic_set(&ref->refs, 1); 3423 atomic_set(&ref->refs, 1);
3421 if (btrfs_delayed_ref_is_head(ref)) { 3424 if (btrfs_delayed_ref_is_head(ref)) {
3422 struct btrfs_delayed_ref_head *head; 3425 struct btrfs_delayed_ref_head *head;
3423 3426
3424 head = btrfs_delayed_node_to_head(ref); 3427 head = btrfs_delayed_node_to_head(ref);
3425 spin_unlock(&delayed_refs->lock); 3428 if (!mutex_trylock(&head->mutex)) {
3426 mutex_lock(&head->mutex); 3429 atomic_inc(&ref->refs);
3430 spin_unlock(&delayed_refs->lock);
3431
3432 /* Need to wait for the delayed ref to run */
3433 mutex_lock(&head->mutex);
3434 mutex_unlock(&head->mutex);
3435 btrfs_put_delayed_ref(ref);
3436
3437 spin_lock(&delayed_refs->lock);
3438 continue;
3439 }
3440
3427 kfree(head->extent_op); 3441 kfree(head->extent_op);
3428 delayed_refs->num_heads--; 3442 delayed_refs->num_heads--;
3429 if (list_empty(&head->cluster)) 3443 if (list_empty(&head->cluster))
3430 delayed_refs->num_heads_ready--; 3444 delayed_refs->num_heads_ready--;
3431 list_del_init(&head->cluster); 3445 list_del_init(&head->cluster);
3432 mutex_unlock(&head->mutex);
3433 btrfs_put_delayed_ref(ref);
3434 goto again;
3435 } 3446 }
3447 ref->in_tree = 0;
3448 rb_erase(&ref->rb_node, &delayed_refs->root);
3449 delayed_refs->num_entries--;
3450
3436 spin_unlock(&delayed_refs->lock); 3451 spin_unlock(&delayed_refs->lock);
3437 btrfs_put_delayed_ref(ref); 3452 btrfs_put_delayed_ref(ref);
3438 3453
@@ -3520,11 +3535,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3520 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, 3535 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
3521 offset >> PAGE_CACHE_SHIFT); 3536 offset >> PAGE_CACHE_SHIFT);
3522 spin_unlock(&dirty_pages->buffer_lock); 3537 spin_unlock(&dirty_pages->buffer_lock);
3523 if (eb) { 3538 if (eb)
3524 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, 3539 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
3525 &eb->bflags); 3540 &eb->bflags);
3526 atomic_set(&eb->refs, 1);
3527 }
3528 if (PageWriteback(page)) 3541 if (PageWriteback(page))
3529 end_page_writeback(page); 3542 end_page_writeback(page);
3530 3543
@@ -3538,8 +3551,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3538 spin_unlock_irq(&page->mapping->tree_lock); 3551 spin_unlock_irq(&page->mapping->tree_lock);
3539 } 3552 }
3540 3553
3541 page->mapping->a_ops->invalidatepage(page, 0);
3542 unlock_page(page); 3554 unlock_page(page);
3555 page_cache_release(page);
3543 } 3556 }
3544 } 3557 }
3545 3558
@@ -3553,8 +3566,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3553 u64 start; 3566 u64 start;
3554 u64 end; 3567 u64 end;
3555 int ret; 3568 int ret;
3569 bool loop = true;
3556 3570
3557 unpin = pinned_extents; 3571 unpin = pinned_extents;
3572again:
3558 while (1) { 3573 while (1) {
3559 ret = find_first_extent_bit(unpin, 0, &start, &end, 3574 ret = find_first_extent_bit(unpin, 0, &start, &end,
3560 EXTENT_DIRTY); 3575 EXTENT_DIRTY);
@@ -3572,6 +3587,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
3572 cond_resched(); 3587 cond_resched();
3573 } 3588 }
3574 3589
3590 if (loop) {
3591 if (unpin == &root->fs_info->freed_extents[0])
3592 unpin = &root->fs_info->freed_extents[1];
3593 else
3594 unpin = &root->fs_info->freed_extents[0];
3595 loop = false;
3596 goto again;
3597 }
3598
3575 return 0; 3599 return 0;
3576} 3600}
3577 3601
@@ -3585,21 +3609,23 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3585 /* FIXME: cleanup wait for commit */ 3609 /* FIXME: cleanup wait for commit */
3586 cur_trans->in_commit = 1; 3610 cur_trans->in_commit = 1;
3587 cur_trans->blocked = 1; 3611 cur_trans->blocked = 1;
3588 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3612 wake_up(&root->fs_info->transaction_blocked_wait);
3589 wake_up(&root->fs_info->transaction_blocked_wait);
3590 3613
3591 cur_trans->blocked = 0; 3614 cur_trans->blocked = 0;
3592 if (waitqueue_active(&root->fs_info->transaction_wait)) 3615 wake_up(&root->fs_info->transaction_wait);
3593 wake_up(&root->fs_info->transaction_wait);
3594 3616
3595 cur_trans->commit_done = 1; 3617 cur_trans->commit_done = 1;
3596 if (waitqueue_active(&cur_trans->commit_wait)) 3618 wake_up(&cur_trans->commit_wait);
3597 wake_up(&cur_trans->commit_wait); 3619
3620 btrfs_destroy_delayed_inodes(root);
3621 btrfs_assert_delayed_root_empty(root);
3598 3622
3599 btrfs_destroy_pending_snapshots(cur_trans); 3623 btrfs_destroy_pending_snapshots(cur_trans);
3600 3624
3601 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, 3625 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
3602 EXTENT_DIRTY); 3626 EXTENT_DIRTY);
3627 btrfs_destroy_pinned_extent(root,
3628 root->fs_info->pinned_extents);
3603 3629
3604 /* 3630 /*
3605 memset(cur_trans, 0, sizeof(*cur_trans)); 3631 memset(cur_trans, 0, sizeof(*cur_trans));
@@ -3648,6 +3674,9 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3648 if (waitqueue_active(&t->commit_wait)) 3674 if (waitqueue_active(&t->commit_wait))
3649 wake_up(&t->commit_wait); 3675 wake_up(&t->commit_wait);
3650 3676
3677 btrfs_destroy_delayed_inodes(root);
3678 btrfs_assert_delayed_root_empty(root);
3679
3651 btrfs_destroy_pending_snapshots(t); 3680 btrfs_destroy_pending_snapshots(t);
3652 3681
3653 btrfs_destroy_delalloc_inodes(root); 3682 btrfs_destroy_delalloc_inodes(root);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4b5a1e1bdefb..6e1d36702ff7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2347,12 +2347,10 @@ next:
2347 return count; 2347 return count;
2348} 2348}
2349 2349
2350
2351static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, 2350static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
2352 unsigned long num_refs) 2351 unsigned long num_refs,
2352 struct list_head *first_seq)
2353{ 2353{
2354 struct list_head *first_seq = delayed_refs->seq_head.next;
2355
2356 spin_unlock(&delayed_refs->lock); 2354 spin_unlock(&delayed_refs->lock);
2357 pr_debug("waiting for more refs (num %ld, first %p)\n", 2355 pr_debug("waiting for more refs (num %ld, first %p)\n",
2358 num_refs, first_seq); 2356 num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2381 struct btrfs_delayed_ref_root *delayed_refs; 2379 struct btrfs_delayed_ref_root *delayed_refs;
2382 struct btrfs_delayed_ref_node *ref; 2380 struct btrfs_delayed_ref_node *ref;
2383 struct list_head cluster; 2381 struct list_head cluster;
2382 struct list_head *first_seq = NULL;
2384 int ret; 2383 int ret;
2385 u64 delayed_start; 2384 u64 delayed_start;
2386 int run_all = count == (unsigned long)-1; 2385 int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
2436 */ 2435 */
2437 consider_waiting = 1; 2436 consider_waiting = 1;
2438 num_refs = delayed_refs->num_entries; 2437 num_refs = delayed_refs->num_entries;
2438 first_seq = root->fs_info->tree_mod_seq_list.next;
2439 } else { 2439 } else {
2440 wait_for_more_refs(delayed_refs, num_refs); 2440 wait_for_more_refs(delayed_refs,
2441 num_refs, first_seq);
2441 /* 2442 /*
2442 * after waiting, things have changed. we 2443 * after waiting, things have changed. we
2443 * dropped the lock and someone else might have 2444 * dropped the lock and someone else might have
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2c8f7b204617..01c21b6c6d43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -20,6 +20,7 @@
20#include "volumes.h" 20#include "volumes.h"
21#include "check-integrity.h" 21#include "check-integrity.h"
22#include "locking.h" 22#include "locking.h"
23#include "rcu-string.h"
23 24
24static struct kmem_cache *extent_state_cache; 25static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 26static struct kmem_cache *extent_buffer_cache;
@@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
1917 return -EIO; 1918 return -EIO;
1918 } 1919 }
1919 1920
1920 printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " 1921 printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
1921 "sector %llu)\n", page->mapping->host->i_ino, start, 1922 "(dev %s sector %llu)\n", page->mapping->host->i_ino,
1922 dev->name, sector); 1923 start, rcu_str_deref(dev->name), sector);
1923 1924
1924 bio_put(bio); 1925 bio_put(bio);
1925 return 0; 1926 return 0;
@@ -3323,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
3323 writepage_t writepage, void *data, 3324 writepage_t writepage, void *data,
3324 void (*flush_fn)(void *)) 3325 void (*flush_fn)(void *))
3325{ 3326{
3327 struct inode *inode = mapping->host;
3326 int ret = 0; 3328 int ret = 0;
3327 int done = 0; 3329 int done = 0;
3328 int nr_to_write_done = 0; 3330 int nr_to_write_done = 0;
@@ -3333,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
3333 int scanned = 0; 3335 int scanned = 0;
3334 int tag; 3336 int tag;
3335 3337
3338 /*
3339 * We have to hold onto the inode so that ordered extents can do their
3340 * work when the IO finishes. The alternative to this is failing to add
3341 * an ordered extent if the igrab() fails there and that is a huge pain
3342 * to deal with, so instead just hold onto the inode throughout the
3343 * writepages operation. If it fails here we are freeing up the inode
3344 * anyway and we'd rather not waste our time writing out stuff that is
3345 * going to be truncated anyway.
3346 */
3347 if (!igrab(inode))
3348 return 0;
3349
3336 pagevec_init(&pvec, 0); 3350 pagevec_init(&pvec, 0);
3337 if (wbc->range_cyclic) { 3351 if (wbc->range_cyclic) {
3338 index = mapping->writeback_index; /* Start from prev offset */ 3352 index = mapping->writeback_index; /* Start from prev offset */
@@ -3427,6 +3441,7 @@ retry:
3427 index = 0; 3441 index = 0;
3428 goto retry; 3442 goto retry;
3429 } 3443 }
3444 btrfs_add_delayed_iput(inode);
3430 return ret; 3445 return ret;
3431} 3446}
3432 3447
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 70dc8ca73e25..9aa01ec2138d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1334 loff_t *ppos, size_t count, size_t ocount) 1334 loff_t *ppos, size_t count, size_t ocount)
1335{ 1335{
1336 struct file *file = iocb->ki_filp; 1336 struct file *file = iocb->ki_filp;
1337 struct inode *inode = fdentry(file)->d_inode;
1338 struct iov_iter i; 1337 struct iov_iter i;
1339 ssize_t written; 1338 ssize_t written;
1340 ssize_t written_buffered; 1339 ssize_t written_buffered;
@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1344 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, 1343 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
1345 count, ocount); 1344 count, ocount);
1346 1345
1347 /*
1348 * the generic O_DIRECT will update in-memory i_size after the
1349 * DIOs are done. But our endio handlers that update the on
1350 * disk i_size never update past the in memory i_size. So we
1351 * need one more update here to catch any additions to the
1352 * file
1353 */
1354 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
1355 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
1356 mark_inode_dirty(inode);
1357 }
1358
1359 if (written < 0 || written == count) 1346 if (written < 0 || written == count)
1360 return written; 1347 return written;
1361 1348
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81296c57405a..6c4e2baa9290 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1543,29 +1543,26 @@ again:
1543 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1543 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1544 1544
1545 /* 1545 /*
1546 * XXX - this can go away after a few releases. 1546 * We need to search for bits in this bitmap. We could only cover some
1547 * 1547 * of the extent in this bitmap thanks to how we add space, so we need
1548 * since the only user of btrfs_remove_free_space is the tree logging 1548 * to search for as much as it as we can and clear that amount, and then
1549 * stuff, and the only way to test that is under crash conditions, we 1549 * go searching for the next bit.
1550 * want to have this debug stuff here just in case somethings not
1551 * working. Search the bitmap for the space we are trying to use to
1552 * make sure its actually there. If its not there then we need to stop
1553 * because something has gone wrong.
1554 */ 1550 */
1555 search_start = *offset; 1551 search_start = *offset;
1556 search_bytes = *bytes; 1552 search_bytes = ctl->unit;
1557 search_bytes = min(search_bytes, end - search_start + 1); 1553 search_bytes = min(search_bytes, end - search_start + 1);
1558 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1554 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1559 BUG_ON(ret < 0 || search_start != *offset); 1555 BUG_ON(ret < 0 || search_start != *offset);
1560 1556
1561 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1557 /* We may have found more bits than what we need */
1562 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1558 search_bytes = min(search_bytes, *bytes);
1563 *bytes -= end - *offset + 1; 1559
1564 *offset = end + 1; 1560 /* Cannot clear past the end of the bitmap */
1565 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1561 search_bytes = min(search_bytes, end - search_start + 1);
1566 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1562
1567 *bytes = 0; 1563 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
1568 } 1564 *offset += search_bytes;
1565 *bytes -= search_bytes;
1569 1566
1570 if (*bytes) { 1567 if (*bytes) {
1571 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1568 struct rb_node *next = rb_next(&bitmap_info->offset_index);
@@ -1596,7 +1593,7 @@ again:
1596 * everything over again. 1593 * everything over again.
1597 */ 1594 */
1598 search_start = *offset; 1595 search_start = *offset;
1599 search_bytes = *bytes; 1596 search_bytes = ctl->unit;
1600 ret = search_bitmap(ctl, bitmap_info, &search_start, 1597 ret = search_bitmap(ctl, bitmap_info, &search_start,
1601 &search_bytes); 1598 &search_bytes);
1602 if (ret < 0 || search_start != *offset) 1599 if (ret < 0 || search_start != *offset)
@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1879{ 1876{
1880 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1877 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1881 struct btrfs_free_space *info; 1878 struct btrfs_free_space *info;
1882 struct btrfs_free_space *next_info = NULL;
1883 int ret = 0; 1879 int ret = 0;
1884 1880
1885 spin_lock(&ctl->tree_lock); 1881 spin_lock(&ctl->tree_lock);
1886 1882
1887again: 1883again:
1884 if (!bytes)
1885 goto out_lock;
1886
1888 info = tree_search_offset(ctl, offset, 0, 0); 1887 info = tree_search_offset(ctl, offset, 0, 0);
1889 if (!info) { 1888 if (!info) {
1890 /* 1889 /*
@@ -1905,88 +1904,48 @@ again:
1905 } 1904 }
1906 } 1905 }
1907 1906
1908 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1907 if (!info->bitmap) {
1909 u64 end;
1910 next_info = rb_entry(rb_next(&info->offset_index),
1911 struct btrfs_free_space,
1912 offset_index);
1913
1914 if (next_info->bitmap)
1915 end = next_info->offset +
1916 BITS_PER_BITMAP * ctl->unit - 1;
1917 else
1918 end = next_info->offset + next_info->bytes;
1919
1920 if (next_info->bytes < bytes ||
1921 next_info->offset > offset || offset > end) {
1922 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1923 " trying to use %llu\n",
1924 (unsigned long long)info->offset,
1925 (unsigned long long)info->bytes,
1926 (unsigned long long)bytes);
1927 WARN_ON(1);
1928 ret = -EINVAL;
1929 goto out_lock;
1930 }
1931
1932 info = next_info;
1933 }
1934
1935 if (info->bytes == bytes) {
1936 unlink_free_space(ctl, info); 1908 unlink_free_space(ctl, info);
1937 if (info->bitmap) { 1909 if (offset == info->offset) {
1938 kfree(info->bitmap); 1910 u64 to_free = min(bytes, info->bytes);
1939 ctl->total_bitmaps--; 1911
1940 } 1912 info->bytes -= to_free;
1941 kmem_cache_free(btrfs_free_space_cachep, info); 1913 info->offset += to_free;
1942 ret = 0; 1914 if (info->bytes) {
1943 goto out_lock; 1915 ret = link_free_space(ctl, info);
1944 } 1916 WARN_ON(ret);
1945 1917 } else {
1946 if (!info->bitmap && info->offset == offset) { 1918 kmem_cache_free(btrfs_free_space_cachep, info);
1947 unlink_free_space(ctl, info); 1919 }
1948 info->offset += bytes;
1949 info->bytes -= bytes;
1950 ret = link_free_space(ctl, info);
1951 WARN_ON(ret);
1952 goto out_lock;
1953 }
1954 1920
1955 if (!info->bitmap && info->offset <= offset && 1921 offset += to_free;
1956 info->offset + info->bytes >= offset + bytes) { 1922 bytes -= to_free;
1957 u64 old_start = info->offset; 1923 goto again;
1958 /* 1924 } else {
1959 * we're freeing space in the middle of the info, 1925 u64 old_end = info->bytes + info->offset;
1960 * this can happen during tree log replay
1961 *
1962 * first unlink the old info and then
1963 * insert it again after the hole we're creating
1964 */
1965 unlink_free_space(ctl, info);
1966 if (offset + bytes < info->offset + info->bytes) {
1967 u64 old_end = info->offset + info->bytes;
1968 1926
1969 info->offset = offset + bytes; 1927 info->bytes = offset - info->offset;
1970 info->bytes = old_end - info->offset;
1971 ret = link_free_space(ctl, info); 1928 ret = link_free_space(ctl, info);
1972 WARN_ON(ret); 1929 WARN_ON(ret);
1973 if (ret) 1930 if (ret)
1974 goto out_lock; 1931 goto out_lock;
1975 } else {
1976 /* the hole we're creating ends at the end
1977 * of the info struct, just free the info
1978 */
1979 kmem_cache_free(btrfs_free_space_cachep, info);
1980 }
1981 spin_unlock(&ctl->tree_lock);
1982 1932
1983 /* step two, insert a new info struct to cover 1933 /* Not enough bytes in this entry to satisfy us */
1984 * anything before the hole 1934 if (old_end < offset + bytes) {
1985 */ 1935 bytes -= old_end - offset;
1986 ret = btrfs_add_free_space(block_group, old_start, 1936 offset = old_end;
1987 offset - old_start); 1937 goto again;
1988 WARN_ON(ret); /* -ENOMEM */ 1938 } else if (old_end == offset + bytes) {
1989 goto out; 1939 /* all done */
1940 goto out_lock;
1941 }
1942 spin_unlock(&ctl->tree_lock);
1943
1944 ret = btrfs_add_free_space(block_group, offset + bytes,
1945 old_end - (offset + bytes));
1946 WARN_ON(ret);
1947 goto out;
1948 }
1990 } 1949 }
1991 1950
1992 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1951 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f6ab6f5e635a..a7d1921ac76b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode,
830 if (IS_ERR(trans)) { 830 if (IS_ERR(trans)) {
831 extent_clear_unlock_delalloc(inode, 831 extent_clear_unlock_delalloc(inode,
832 &BTRFS_I(inode)->io_tree, 832 &BTRFS_I(inode)->io_tree,
833 start, end, NULL, 833 start, end, locked_page,
834 EXTENT_CLEAR_UNLOCK_PAGE | 834 EXTENT_CLEAR_UNLOCK_PAGE |
835 EXTENT_CLEAR_UNLOCK | 835 EXTENT_CLEAR_UNLOCK |
836 EXTENT_CLEAR_DELALLOC | 836 EXTENT_CLEAR_DELALLOC |
@@ -963,7 +963,7 @@ out:
963out_unlock: 963out_unlock:
964 extent_clear_unlock_delalloc(inode, 964 extent_clear_unlock_delalloc(inode,
965 &BTRFS_I(inode)->io_tree, 965 &BTRFS_I(inode)->io_tree,
966 start, end, NULL, 966 start, end, locked_page,
967 EXTENT_CLEAR_UNLOCK_PAGE | 967 EXTENT_CLEAR_UNLOCK_PAGE |
968 EXTENT_CLEAR_UNLOCK | 968 EXTENT_CLEAR_UNLOCK |
969 EXTENT_CLEAR_DELALLOC | 969 EXTENT_CLEAR_DELALLOC |
@@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work)
986 compress_file_range(async_cow->inode, async_cow->locked_page, 986 compress_file_range(async_cow->inode, async_cow->locked_page,
987 async_cow->start, async_cow->end, async_cow, 987 async_cow->start, async_cow->end, async_cow,
988 &num_added); 988 &num_added);
989 if (num_added == 0) 989 if (num_added == 0) {
990 btrfs_add_delayed_iput(async_cow->inode);
990 async_cow->inode = NULL; 991 async_cow->inode = NULL;
992 }
991} 993}
992 994
993/* 995/*
@@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
1020{ 1022{
1021 struct async_cow *async_cow; 1023 struct async_cow *async_cow;
1022 async_cow = container_of(work, struct async_cow, work); 1024 async_cow = container_of(work, struct async_cow, work);
1025 if (async_cow->inode)
1026 btrfs_add_delayed_iput(async_cow->inode);
1023 kfree(async_cow); 1027 kfree(async_cow);
1024} 1028}
1025 1029
@@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1038 while (start < end) { 1042 while (start < end) {
1039 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 1043 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
1040 BUG_ON(!async_cow); /* -ENOMEM */ 1044 BUG_ON(!async_cow); /* -ENOMEM */
1041 async_cow->inode = inode; 1045 async_cow->inode = igrab(inode);
1042 async_cow->root = root; 1046 async_cow->root = root;
1043 async_cow->locked_page = locked_page; 1047 async_cow->locked_page = locked_page;
1044 async_cow->start = start; 1048 async_cow->start = start;
@@ -1136,8 +1140,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1136 u64 ino = btrfs_ino(inode); 1140 u64 ino = btrfs_ino(inode);
1137 1141
1138 path = btrfs_alloc_path(); 1142 path = btrfs_alloc_path();
1139 if (!path) 1143 if (!path) {
1144 extent_clear_unlock_delalloc(inode,
1145 &BTRFS_I(inode)->io_tree,
1146 start, end, locked_page,
1147 EXTENT_CLEAR_UNLOCK_PAGE |
1148 EXTENT_CLEAR_UNLOCK |
1149 EXTENT_CLEAR_DELALLOC |
1150 EXTENT_CLEAR_DIRTY |
1151 EXTENT_SET_WRITEBACK |
1152 EXTENT_END_WRITEBACK);
1140 return -ENOMEM; 1153 return -ENOMEM;
1154 }
1141 1155
1142 nolock = btrfs_is_free_space_inode(root, inode); 1156 nolock = btrfs_is_free_space_inode(root, inode);
1143 1157
@@ -1147,6 +1161,15 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1147 trans = btrfs_join_transaction(root); 1161 trans = btrfs_join_transaction(root);
1148 1162
1149 if (IS_ERR(trans)) { 1163 if (IS_ERR(trans)) {
1164 extent_clear_unlock_delalloc(inode,
1165 &BTRFS_I(inode)->io_tree,
1166 start, end, locked_page,
1167 EXTENT_CLEAR_UNLOCK_PAGE |
1168 EXTENT_CLEAR_UNLOCK |
1169 EXTENT_CLEAR_DELALLOC |
1170 EXTENT_CLEAR_DIRTY |
1171 EXTENT_SET_WRITEBACK |
1172 EXTENT_END_WRITEBACK);
1150 btrfs_free_path(path); 1173 btrfs_free_path(path);
1151 return PTR_ERR(trans); 1174 return PTR_ERR(trans);
1152 } 1175 }
@@ -1327,8 +1350,11 @@ out_check:
1327 } 1350 }
1328 btrfs_release_path(path); 1351 btrfs_release_path(path);
1329 1352
1330 if (cur_offset <= end && cow_start == (u64)-1) 1353 if (cur_offset <= end && cow_start == (u64)-1) {
1331 cow_start = cur_offset; 1354 cow_start = cur_offset;
1355 cur_offset = end;
1356 }
1357
1332 if (cow_start != (u64)-1) { 1358 if (cow_start != (u64)-1) {
1333 ret = cow_file_range(inode, locked_page, cow_start, end, 1359 ret = cow_file_range(inode, locked_page, cow_start, end,
1334 page_started, nr_written, 1); 1360 page_started, nr_written, 1);
@@ -1347,6 +1373,17 @@ error:
1347 if (!ret) 1373 if (!ret)
1348 ret = err; 1374 ret = err;
1349 1375
1376 if (ret && cur_offset < end)
1377 extent_clear_unlock_delalloc(inode,
1378 &BTRFS_I(inode)->io_tree,
1379 cur_offset, end, locked_page,
1380 EXTENT_CLEAR_UNLOCK_PAGE |
1381 EXTENT_CLEAR_UNLOCK |
1382 EXTENT_CLEAR_DELALLOC |
1383 EXTENT_CLEAR_DIRTY |
1384 EXTENT_SET_WRITEBACK |
1385 EXTENT_END_WRITEBACK);
1386
1350 btrfs_free_path(path); 1387 btrfs_free_path(path);
1351 return ret; 1388 return ret;
1352} 1389}
@@ -1361,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1361 int ret; 1398 int ret;
1362 struct btrfs_root *root = BTRFS_I(inode)->root; 1399 struct btrfs_root *root = BTRFS_I(inode)->root;
1363 1400
1364 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) 1401 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {
1365 ret = run_delalloc_nocow(inode, locked_page, start, end, 1402 ret = run_delalloc_nocow(inode, locked_page, start, end,
1366 page_started, 1, nr_written); 1403 page_started, 1, nr_written);
1367 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1404 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {
1368 ret = run_delalloc_nocow(inode, locked_page, start, end, 1405 ret = run_delalloc_nocow(inode, locked_page, start, end,
1369 page_started, 0, nr_written); 1406 page_started, 0, nr_written);
1370 else if (!btrfs_test_opt(root, COMPRESS) && 1407 } else if (!btrfs_test_opt(root, COMPRESS) &&
1371 !(BTRFS_I(inode)->force_compress) && 1408 !(BTRFS_I(inode)->force_compress) &&
1372 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) 1409 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {
1373 ret = cow_file_range(inode, locked_page, start, end, 1410 ret = cow_file_range(inode, locked_page, start, end,
1374 page_started, nr_written, 1); 1411 page_started, nr_written, 1);
1375 else 1412 } else {
1413 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1414 &BTRFS_I(inode)->runtime_flags);
1376 ret = cow_file_range_async(inode, locked_page, start, end, 1415 ret = cow_file_range_async(inode, locked_page, start, end,
1377 page_started, nr_written); 1416 page_started, nr_written);
1417 }
1378 return ret; 1418 return ret;
1379} 1419}
1380 1420
@@ -3714,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
3714 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3754 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3715 3755
3716 if (root->fs_info->log_root_recovering) { 3756 if (root->fs_info->log_root_recovering) {
3717 BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 3757 BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3718 &BTRFS_I(inode)->runtime_flags)); 3758 &BTRFS_I(inode)->runtime_flags));
3719 goto no_delete; 3759 goto no_delete;
3720 } 3760 }
@@ -5836,8 +5876,17 @@ map:
5836 bh_result->b_size = len; 5876 bh_result->b_size = len;
5837 bh_result->b_bdev = em->bdev; 5877 bh_result->b_bdev = em->bdev;
5838 set_buffer_mapped(bh_result); 5878 set_buffer_mapped(bh_result);
5839 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 5879 if (create) {
5840 set_buffer_new(bh_result); 5880 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5881 set_buffer_new(bh_result);
5882
5883 /*
5884 * Need to update the i_size under the extent lock so buffered
5885 * readers will get the updated i_size when we unlock.
5886 */
5887 if (start + len > i_size_read(inode))
5888 i_size_write(inode, start + len);
5889 }
5841 5890
5842 free_extent_map(em); 5891 free_extent_map(em);
5843 5892
@@ -6320,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6320 */ 6369 */
6321 ordered = btrfs_lookup_ordered_range(inode, lockstart, 6370 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6322 lockend - lockstart + 1); 6371 lockend - lockstart + 1);
6323 if (!ordered) 6372
6373 /*
6374 * We need to make sure there are no buffered pages in this
6375 * range either, we could have raced between the invalidate in
6376 * generic_file_direct_write and locking the extent. The
6377 * invalidate needs to happen so that reads after a write do not
6378 * get stale data.
6379 */
6380 if (!ordered && (!writing ||
6381 !test_range_bit(&BTRFS_I(inode)->io_tree,
6382 lockstart, lockend, EXTENT_UPTODATE, 0,
6383 cached_state)))
6324 break; 6384 break;
6385
6325 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6386 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6326 &cached_state, GFP_NOFS); 6387 &cached_state, GFP_NOFS);
6327 btrfs_start_ordered_extent(inode, ordered, 1); 6388
6328 btrfs_put_ordered_extent(ordered); 6389 if (ordered) {
6390 btrfs_start_ordered_extent(inode, ordered, 1);
6391 btrfs_put_ordered_extent(ordered);
6392 } else {
6393 /* Screw you mmap */
6394 ret = filemap_write_and_wait_range(file->f_mapping,
6395 lockstart,
6396 lockend);
6397 if (ret)
6398 goto out;
6399
6400 /*
6401 * If we found a page that couldn't be invalidated just
6402 * fall back to buffered.
6403 */
6404 ret = invalidate_inode_pages2_range(file->f_mapping,
6405 lockstart >> PAGE_CACHE_SHIFT,
6406 lockend >> PAGE_CACHE_SHIFT);
6407 if (ret) {
6408 if (ret == -EBUSY)
6409 ret = 0;
6410 goto out;
6411 }
6412 }
6413
6329 cond_resched(); 6414 cond_resched();
6330 } 6415 }
6331 6416
@@ -7054,10 +7139,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode)
7054 else 7139 else
7055 b_inode->flags &= ~BTRFS_INODE_NODATACOW; 7140 b_inode->flags &= ~BTRFS_INODE_NODATACOW;
7056 7141
7057 if (b_dir->flags & BTRFS_INODE_COMPRESS) 7142 if (b_dir->flags & BTRFS_INODE_COMPRESS) {
7058 b_inode->flags |= BTRFS_INODE_COMPRESS; 7143 b_inode->flags |= BTRFS_INODE_COMPRESS;
7059 else 7144 b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS;
7060 b_inode->flags &= ~BTRFS_INODE_COMPRESS; 7145 } else {
7146 b_inode->flags &= ~(BTRFS_INODE_COMPRESS |
7147 BTRFS_INODE_NOCOMPRESS);
7148 }
7061} 7149}
7062 7150
7063static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, 7151static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 24b776c08d99..0e92e5763005 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -52,6 +52,7 @@
52#include "locking.h" 52#include "locking.h"
53#include "inode-map.h" 53#include "inode-map.h"
54#include "backref.h" 54#include "backref.h"
55#include "rcu-string.h"
55 56
56/* Mask out flags that are inappropriate for the given type of inode. */ 57/* Mask out flags that are inappropriate for the given type of inode. */
57static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 58static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -785,39 +786,57 @@ none:
785 return -ENOENT; 786 return -ENOENT;
786} 787}
787 788
788/* 789static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
789 * Validaty check of prev em and next em:
790 * 1) no prev/next em
791 * 2) prev/next em is an hole/inline extent
792 */
793static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
794{ 790{
795 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 791 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
796 struct extent_map *prev = NULL, *next = NULL; 792 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
797 int ret = 0; 793 struct extent_map *em;
794 u64 len = PAGE_CACHE_SIZE;
798 795
796 /*
797 * hopefully we have this extent in the tree already, try without
798 * the full extent lock
799 */
799 read_lock(&em_tree->lock); 800 read_lock(&em_tree->lock);
800 prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1); 801 em = lookup_extent_mapping(em_tree, start, len);
801 next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
802 read_unlock(&em_tree->lock); 802 read_unlock(&em_tree->lock);
803 803
804 if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) && 804 if (!em) {
805 (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)) 805 /* get the big lock and read metadata off disk */
806 ret = 1; 806 lock_extent(io_tree, start, start + len - 1);
807 free_extent_map(prev); 807 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
808 free_extent_map(next); 808 unlock_extent(io_tree, start, start + len - 1);
809
810 if (IS_ERR(em))
811 return NULL;
812 }
813
814 return em;
815}
816
817static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
818{
819 struct extent_map *next;
820 bool ret = true;
809 821
822 /* this is the last extent */
823 if (em->start + em->len >= i_size_read(inode))
824 return false;
825
826 next = defrag_lookup_extent(inode, em->start + em->len);
827 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
828 ret = false;
829
830 free_extent_map(next);
810 return ret; 831 return ret;
811} 832}
812 833
813static int should_defrag_range(struct inode *inode, u64 start, u64 len, 834static int should_defrag_range(struct inode *inode, u64 start, int thresh,
814 int thresh, u64 *last_len, u64 *skip, 835 u64 *last_len, u64 *skip, u64 *defrag_end)
815 u64 *defrag_end)
816{ 836{
817 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 837 struct extent_map *em;
818 struct extent_map *em = NULL;
819 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
820 int ret = 1; 838 int ret = 1;
839 bool next_mergeable = true;
821 840
822 /* 841 /*
823 * make sure that once we start defragging an extent, we keep on 842 * make sure that once we start defragging an extent, we keep on
@@ -828,23 +847,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
828 847
829 *skip = 0; 848 *skip = 0;
830 849
831 /* 850 em = defrag_lookup_extent(inode, start);
832 * hopefully we have this extent in the tree already, try without 851 if (!em)
833 * the full extent lock 852 return 0;
834 */
835 read_lock(&em_tree->lock);
836 em = lookup_extent_mapping(em_tree, start, len);
837 read_unlock(&em_tree->lock);
838
839 if (!em) {
840 /* get the big lock and read metadata off disk */
841 lock_extent(io_tree, start, start + len - 1);
842 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
843 unlock_extent(io_tree, start, start + len - 1);
844
845 if (IS_ERR(em))
846 return 0;
847 }
848 853
849 /* this will cover holes, and inline extents */ 854 /* this will cover holes, and inline extents */
850 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 855 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
@@ -852,18 +857,15 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
852 goto out; 857 goto out;
853 } 858 }
854 859
855 /* If we have nothing to merge with us, just skip. */ 860 next_mergeable = defrag_check_next_extent(inode, em);
856 if (check_adjacent_extents(inode, em)) {
857 ret = 0;
858 goto out;
859 }
860 861
861 /* 862 /*
862 * we hit a real extent, if it is big don't bother defragging it again 863 * we hit a real extent, if it is big or the next extent is not a
864 * real extent, don't bother defragging it
863 */ 865 */
864 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) 866 if ((*last_len == 0 || *last_len >= thresh) &&
867 (em->len >= thresh || !next_mergeable))
865 ret = 0; 868 ret = 0;
866
867out: 869out:
868 /* 870 /*
869 * last_len ends up being a counter of how many bytes we've defragged. 871 * last_len ends up being a counter of how many bytes we've defragged.
@@ -1142,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1142 break; 1144 break;
1143 1145
1144 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1146 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1145 PAGE_CACHE_SIZE, extent_thresh, 1147 extent_thresh, &last_len, &skip,
1146 &last_len, &skip, &defrag_end)) { 1148 &defrag_end)) {
1147 unsigned long next; 1149 unsigned long next;
1148 /* 1150 /*
1149 * the should_defrag function tells us how much to skip 1151 * the should_defrag function tells us how much to skip
@@ -1304,6 +1306,14 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1304 ret = -EINVAL; 1306 ret = -EINVAL;
1305 goto out_free; 1307 goto out_free;
1306 } 1308 }
1309 if (device->fs_devices && device->fs_devices->seeding) {
1310 printk(KERN_INFO "btrfs: resizer unable to apply on "
1311 "seeding device %llu\n",
1312 (unsigned long long)devid);
1313 ret = -EINVAL;
1314 goto out_free;
1315 }
1316
1307 if (!strcmp(sizestr, "max")) 1317 if (!strcmp(sizestr, "max"))
1308 new_size = device->bdev->bd_inode->i_size; 1318 new_size = device->bdev->bd_inode->i_size;
1309 else { 1319 else {
@@ -1345,8 +1355,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1345 do_div(new_size, root->sectorsize); 1355 do_div(new_size, root->sectorsize);
1346 new_size *= root->sectorsize; 1356 new_size *= root->sectorsize;
1347 1357
1348 printk(KERN_INFO "btrfs: new size for %s is %llu\n", 1358 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
1349 device->name, (unsigned long long)new_size); 1359 rcu_str_deref(device->name),
1360 (unsigned long long)new_size);
1350 1361
1351 if (new_size > old_size) { 1362 if (new_size > old_size) {
1352 trans = btrfs_start_transaction(root, 0); 1363 trans = btrfs_start_transaction(root, 0);
@@ -2264,7 +2275,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2264 di_args->total_bytes = dev->total_bytes; 2275 di_args->total_bytes = dev->total_bytes;
2265 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2276 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2266 if (dev->name) { 2277 if (dev->name) {
2267 strncpy(di_args->path, dev->name, sizeof(di_args->path)); 2278 struct rcu_string *name;
2279
2280 rcu_read_lock();
2281 name = rcu_dereference(dev->name);
2282 strncpy(di_args->path, name->str, sizeof(di_args->path));
2283 rcu_read_unlock();
2268 di_args->path[sizeof(di_args->path) - 1] = 0; 2284 di_args->path[sizeof(di_args->path) - 1] = 0;
2269 } else { 2285 } else {
2270 di_args->path[0] = '\0'; 2286 di_args->path[0] = '\0';
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 497c530724cf..e440aa653c30 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
339#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 339#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
340#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ 340#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
341 struct btrfs_ioctl_vol_args_v2) 341 struct btrfs_ioctl_vol_args_v2)
342#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) 342#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
343#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) 343#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
344#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ 344#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
345 struct btrfs_ioctl_scrub_args) 345 struct btrfs_ioctl_scrub_args)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9e138cdc36c5..643335a4fe3c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
627 /* start IO across the range first to instantiate any delalloc 627 /* start IO across the range first to instantiate any delalloc
628 * extents 628 * extents
629 */ 629 */
630 filemap_write_and_wait_range(inode->i_mapping, start, orig_end); 630 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
631
632 /*
633 * So with compression we will find and lock a dirty page and clear the
634 * first one as dirty, setup an async extent, and immediately return
635 * with the entire range locked but with nobody actually marked with
636 * writeback. So we can't just filemap_write_and_wait_range() and
637 * expect it to work since it will just kick off a thread to do the
638 * actual work. So we need to call filemap_fdatawrite_range _again_
639 * since it will wait on the page lock, which won't be unlocked until
640 * after the pages have been marked as writeback and so we're good to go
641 * from there. We have to do this otherwise we'll miss the ordered
642 * extents and that results in badness. Please Josef, do not think you
643 * know better and pull this out at some point in the future, it is
644 * right and you are wrong.
645 */
646 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
647 &BTRFS_I(inode)->runtime_flags))
648 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
649
650 filemap_fdatawait_range(inode->i_mapping, start, orig_end);
631 651
632 end = orig_end; 652 end = orig_end;
633 found = 0; 653 found = 0;
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
new file mode 100644
index 000000000000..9e111e4576d4
--- /dev/null
+++ b/fs/btrfs/rcu-string.h
@@ -0,0 +1,56 @@
1/*
2 * Copyright (C) 2012 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19struct rcu_string {
20 struct rcu_head rcu;
21 char str[0];
22};
23
24static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
25{
26 size_t len = strlen(src) + 1;
27 struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) +
28 (len * sizeof(char)), mask);
29 if (!ret)
30 return ret;
31 strncpy(ret->str, src, len);
32 return ret;
33}
34
35static inline void rcu_string_free(struct rcu_string *str)
36{
37 if (str)
38 kfree_rcu(str, rcu);
39}
40
41#define printk_in_rcu(fmt, ...) do { \
42 rcu_read_lock(); \
43 printk(fmt, __VA_ARGS__); \
44 rcu_read_unlock(); \
45} while (0)
46
47#define printk_ratelimited_in_rcu(fmt, ...) do { \
48 rcu_read_lock(); \
49 printk_ratelimited(fmt, __VA_ARGS__); \
50 rcu_read_unlock(); \
51} while (0)
52
53#define rcu_str_deref(rcu_str) ({ \
54 struct rcu_string *__str = rcu_dereference(rcu_str); \
55 __str->str; \
56})
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a38cfa4f251e..b223620cd5a6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -26,6 +26,7 @@
26#include "backref.h" 26#include "backref.h"
27#include "extent_io.h" 27#include "extent_io.h"
28#include "check-integrity.h" 28#include "check-integrity.h"
29#include "rcu-string.h"
29 30
30/* 31/*
31 * This is only the first step towards a full-features scrub. It reads all 32 * This is only the first step towards a full-features scrub. It reads all
@@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
320 * hold all of the paths here 321 * hold all of the paths here
321 */ 322 */
322 for (i = 0; i < ipath->fspath->elem_cnt; ++i) 323 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
323 printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 324 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
324 "%s, sector %llu, root %llu, inode %llu, offset %llu, " 325 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
325 "length %llu, links %u (path: %s)\n", swarn->errstr, 326 "length %llu, links %u (path: %s)\n", swarn->errstr,
326 swarn->logical, swarn->dev->name, 327 swarn->logical, rcu_str_deref(swarn->dev->name),
327 (unsigned long long)swarn->sector, root, inum, offset, 328 (unsigned long long)swarn->sector, root, inum, offset,
328 min(isize - offset, (u64)PAGE_SIZE), nlink, 329 min(isize - offset, (u64)PAGE_SIZE), nlink,
329 (char *)(unsigned long)ipath->fspath->val[i]); 330 (char *)(unsigned long)ipath->fspath->val[i]);
@@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
332 return 0; 333 return 0;
333 334
334err: 335err:
335 printk(KERN_WARNING "btrfs: %s at logical %llu on dev " 336 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
336 "%s, sector %llu, root %llu, inode %llu, offset %llu: path " 337 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
337 "resolving failed with ret=%d\n", swarn->errstr, 338 "resolving failed with ret=%d\n", swarn->errstr,
338 swarn->logical, swarn->dev->name, 339 swarn->logical, rcu_str_deref(swarn->dev->name),
339 (unsigned long long)swarn->sector, root, inum, offset, ret); 340 (unsigned long long)swarn->sector, root, inum, offset, ret);
340 341
341 free_ipath(ipath); 342 free_ipath(ipath);
@@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
390 do { 391 do {
391 ret = tree_backref_for_extent(&ptr, eb, ei, item_size, 392 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
392 &ref_root, &ref_level); 393 &ref_root, &ref_level);
393 printk(KERN_WARNING 394 printk_in_rcu(KERN_WARNING
394 "btrfs: %s at logical %llu on dev %s, " 395 "btrfs: %s at logical %llu on dev %s, "
395 "sector %llu: metadata %s (level %d) in tree " 396 "sector %llu: metadata %s (level %d) in tree "
396 "%llu\n", errstr, swarn.logical, dev->name, 397 "%llu\n", errstr, swarn.logical,
398 rcu_str_deref(dev->name),
397 (unsigned long long)swarn.sector, 399 (unsigned long long)swarn.sector,
398 ref_level ? "node" : "leaf", 400 ref_level ? "node" : "leaf",
399 ret < 0 ? -1 : ref_level, 401 ret < 0 ? -1 : ref_level,
@@ -580,9 +582,11 @@ out:
580 spin_lock(&sdev->stat_lock); 582 spin_lock(&sdev->stat_lock);
581 ++sdev->stat.uncorrectable_errors; 583 ++sdev->stat.uncorrectable_errors;
582 spin_unlock(&sdev->stat_lock); 584 spin_unlock(&sdev->stat_lock);
583 printk_ratelimited(KERN_ERR 585
586 printk_ratelimited_in_rcu(KERN_ERR
584 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", 587 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
585 (unsigned long long)fixup->logical, sdev->dev->name); 588 (unsigned long long)fixup->logical,
589 rcu_str_deref(sdev->dev->name));
586 } 590 }
587 591
588 btrfs_free_path(path); 592 btrfs_free_path(path);
@@ -936,18 +940,20 @@ corrected_error:
936 spin_lock(&sdev->stat_lock); 940 spin_lock(&sdev->stat_lock);
937 sdev->stat.corrected_errors++; 941 sdev->stat.corrected_errors++;
938 spin_unlock(&sdev->stat_lock); 942 spin_unlock(&sdev->stat_lock);
939 printk_ratelimited(KERN_ERR 943 printk_ratelimited_in_rcu(KERN_ERR
940 "btrfs: fixed up error at logical %llu on dev %s\n", 944 "btrfs: fixed up error at logical %llu on dev %s\n",
941 (unsigned long long)logical, sdev->dev->name); 945 (unsigned long long)logical,
946 rcu_str_deref(sdev->dev->name));
942 } 947 }
943 } else { 948 } else {
944did_not_correct_error: 949did_not_correct_error:
945 spin_lock(&sdev->stat_lock); 950 spin_lock(&sdev->stat_lock);
946 sdev->stat.uncorrectable_errors++; 951 sdev->stat.uncorrectable_errors++;
947 spin_unlock(&sdev->stat_lock); 952 spin_unlock(&sdev->stat_lock);
948 printk_ratelimited(KERN_ERR 953 printk_ratelimited_in_rcu(KERN_ERR
949 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", 954 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
950 (unsigned long long)logical, sdev->dev->name); 955 (unsigned long long)logical,
956 rcu_str_deref(sdev->dev->name));
951 } 957 }
952 958
953out: 959out:
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 96eb9fef7bd2..e23991574fdf 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -54,6 +54,7 @@
54#include "version.h" 54#include "version.h"
55#include "export.h" 55#include "export.h"
56#include "compression.h" 56#include "compression.h"
57#include "rcu-string.h"
57 58
58#define CREATE_TRACE_POINTS 59#define CREATE_TRACE_POINTS
59#include <trace/events/btrfs.h> 60#include <trace/events/btrfs.h>
@@ -1186,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1186 if (ret) 1187 if (ret)
1187 goto restore; 1188 goto restore;
1188 1189
1190 ret = btrfs_resume_balance_async(fs_info);
1191 if (ret)
1192 goto restore;
1193
1189 sb->s_flags &= ~MS_RDONLY; 1194 sb->s_flags &= ~MS_RDONLY;
1190 } 1195 }
1191 1196
@@ -1482,12 +1487,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
1482 "error %d\n", btrfs_ino(inode), ret); 1487 "error %d\n", btrfs_ino(inode), ret);
1483} 1488}
1484 1489
1490static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1491{
1492 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
1493 struct btrfs_fs_devices *cur_devices;
1494 struct btrfs_device *dev, *first_dev = NULL;
1495 struct list_head *head;
1496 struct rcu_string *name;
1497
1498 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1499 cur_devices = fs_info->fs_devices;
1500 while (cur_devices) {
1501 head = &cur_devices->devices;
1502 list_for_each_entry(dev, head, dev_list) {
1503 if (!first_dev || dev->devid < first_dev->devid)
1504 first_dev = dev;
1505 }
1506 cur_devices = cur_devices->seed;
1507 }
1508
1509 if (first_dev) {
1510 rcu_read_lock();
1511 name = rcu_dereference(first_dev->name);
1512 seq_escape(m, name->str, " \t\n\\");
1513 rcu_read_unlock();
1514 } else {
1515 WARN_ON(1);
1516 }
1517 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1518 return 0;
1519}
1520
1485static const struct super_operations btrfs_super_ops = { 1521static const struct super_operations btrfs_super_ops = {
1486 .drop_inode = btrfs_drop_inode, 1522 .drop_inode = btrfs_drop_inode,
1487 .evict_inode = btrfs_evict_inode, 1523 .evict_inode = btrfs_evict_inode,
1488 .put_super = btrfs_put_super, 1524 .put_super = btrfs_put_super,
1489 .sync_fs = btrfs_sync_fs, 1525 .sync_fs = btrfs_sync_fs,
1490 .show_options = btrfs_show_options, 1526 .show_options = btrfs_show_options,
1527 .show_devname = btrfs_show_devname,
1491 .write_inode = btrfs_write_inode, 1528 .write_inode = btrfs_write_inode,
1492 .dirty_inode = btrfs_fs_dirty_inode, 1529 .dirty_inode = btrfs_fs_dirty_inode,
1493 .alloc_inode = btrfs_alloc_inode, 1530 .alloc_inode = btrfs_alloc_inode,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 1791c6e3d834..b72b068183ec 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -100,6 +100,10 @@ loop:
100 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 100 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
101 cur_trans = fs_info->running_transaction; 101 cur_trans = fs_info->running_transaction;
102 goto loop; 102 goto loop;
103 } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
104 spin_unlock(&root->fs_info->trans_lock);
105 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
106 return -EROFS;
103 } 107 }
104 108
105 atomic_set(&cur_trans->num_writers, 1); 109 atomic_set(&cur_trans->num_writers, 1);
@@ -1213,14 +1217,20 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1213 1217
1214 1218
1215static void cleanup_transaction(struct btrfs_trans_handle *trans, 1219static void cleanup_transaction(struct btrfs_trans_handle *trans,
1216 struct btrfs_root *root) 1220 struct btrfs_root *root, int err)
1217{ 1221{
1218 struct btrfs_transaction *cur_trans = trans->transaction; 1222 struct btrfs_transaction *cur_trans = trans->transaction;
1219 1223
1220 WARN_ON(trans->use_count > 1); 1224 WARN_ON(trans->use_count > 1);
1221 1225
1226 btrfs_abort_transaction(trans, root, err);
1227
1222 spin_lock(&root->fs_info->trans_lock); 1228 spin_lock(&root->fs_info->trans_lock);
1223 list_del_init(&cur_trans->list); 1229 list_del_init(&cur_trans->list);
1230 if (cur_trans == root->fs_info->running_transaction) {
1231 root->fs_info->running_transaction = NULL;
1232 root->fs_info->trans_no_join = 0;
1233 }
1224 spin_unlock(&root->fs_info->trans_lock); 1234 spin_unlock(&root->fs_info->trans_lock);
1225 1235
1226 btrfs_cleanup_one_transaction(trans->transaction, root); 1236 btrfs_cleanup_one_transaction(trans->transaction, root);
@@ -1526,7 +1536,7 @@ cleanup_transaction:
1526// WARN_ON(1); 1536// WARN_ON(1);
1527 if (current->journal_info == trans) 1537 if (current->journal_info == trans)
1528 current->journal_info = NULL; 1538 current->journal_info = NULL;
1529 cleanup_transaction(trans, root); 1539 cleanup_transaction(trans, root, ret);
1530 1540
1531 return ret; 1541 return ret;
1532} 1542}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2017d0ff511c..8abeae4224f9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
690 kfree(name); 690 kfree(name);
691 691
692 iput(inode); 692 iput(inode);
693
694 btrfs_run_delayed_items(trans, root);
693 return ret; 695 return ret;
694} 696}
695 697
@@ -895,6 +897,7 @@ again:
895 ret = btrfs_unlink_inode(trans, root, dir, 897 ret = btrfs_unlink_inode(trans, root, dir,
896 inode, victim_name, 898 inode, victim_name,
897 victim_name_len); 899 victim_name_len);
900 btrfs_run_delayed_items(trans, root);
898 } 901 }
899 kfree(victim_name); 902 kfree(victim_name);
900 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 903 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -1475,6 +1478,9 @@ again:
1475 ret = btrfs_unlink_inode(trans, root, dir, inode, 1478 ret = btrfs_unlink_inode(trans, root, dir, inode,
1476 name, name_len); 1479 name, name_len);
1477 BUG_ON(ret); 1480 BUG_ON(ret);
1481
1482 btrfs_run_delayed_items(trans, root);
1483
1478 kfree(name); 1484 kfree(name);
1479 iput(inode); 1485 iput(inode);
1480 1486
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7782020996fe..ecaad40e7ef4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -35,6 +35,7 @@
35#include "volumes.h" 35#include "volumes.h"
36#include "async-thread.h" 36#include "async-thread.h"
37#include "check-integrity.h" 37#include "check-integrity.h"
38#include "rcu-string.h"
38 39
39static int init_first_rw_device(struct btrfs_trans_handle *trans, 40static int init_first_rw_device(struct btrfs_trans_handle *trans,
40 struct btrfs_root *root, 41 struct btrfs_root *root,
@@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
64 device = list_entry(fs_devices->devices.next, 65 device = list_entry(fs_devices->devices.next,
65 struct btrfs_device, dev_list); 66 struct btrfs_device, dev_list);
66 list_del(&device->dev_list); 67 list_del(&device->dev_list);
67 kfree(device->name); 68 rcu_string_free(device->name);
68 kfree(device); 69 kfree(device);
69 } 70 }
70 kfree(fs_devices); 71 kfree(fs_devices);
@@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path,
334{ 335{
335 struct btrfs_device *device; 336 struct btrfs_device *device;
336 struct btrfs_fs_devices *fs_devices; 337 struct btrfs_fs_devices *fs_devices;
338 struct rcu_string *name;
337 u64 found_transid = btrfs_super_generation(disk_super); 339 u64 found_transid = btrfs_super_generation(disk_super);
338 char *name;
339 340
340 fs_devices = find_fsid(disk_super->fsid); 341 fs_devices = find_fsid(disk_super->fsid);
341 if (!fs_devices) { 342 if (!fs_devices) {
@@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path,
369 memcpy(device->uuid, disk_super->dev_item.uuid, 370 memcpy(device->uuid, disk_super->dev_item.uuid,
370 BTRFS_UUID_SIZE); 371 BTRFS_UUID_SIZE);
371 spin_lock_init(&device->io_lock); 372 spin_lock_init(&device->io_lock);
372 device->name = kstrdup(path, GFP_NOFS); 373
373 if (!device->name) { 374 name = rcu_string_strdup(path, GFP_NOFS);
375 if (!name) {
374 kfree(device); 376 kfree(device);
375 return -ENOMEM; 377 return -ENOMEM;
376 } 378 }
379 rcu_assign_pointer(device->name, name);
377 INIT_LIST_HEAD(&device->dev_alloc_list); 380 INIT_LIST_HEAD(&device->dev_alloc_list);
378 381
379 /* init readahead state */ 382 /* init readahead state */
@@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path,
390 393
391 device->fs_devices = fs_devices; 394 device->fs_devices = fs_devices;
392 fs_devices->num_devices++; 395 fs_devices->num_devices++;
393 } else if (!device->name || strcmp(device->name, path)) { 396 } else if (!device->name || strcmp(device->name->str, path)) {
394 name = kstrdup(path, GFP_NOFS); 397 name = rcu_string_strdup(path, GFP_NOFS);
395 if (!name) 398 if (!name)
396 return -ENOMEM; 399 return -ENOMEM;
397 kfree(device->name); 400 rcu_string_free(device->name);
398 device->name = name; 401 rcu_assign_pointer(device->name, name);
399 if (device->missing) { 402 if (device->missing) {
400 fs_devices->missing_devices--; 403 fs_devices->missing_devices--;
401 device->missing = 0; 404 device->missing = 0;
@@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
430 433
431 /* We have held the volume lock, it is safe to get the devices. */ 434 /* We have held the volume lock, it is safe to get the devices. */
432 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 435 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
436 struct rcu_string *name;
437
433 device = kzalloc(sizeof(*device), GFP_NOFS); 438 device = kzalloc(sizeof(*device), GFP_NOFS);
434 if (!device) 439 if (!device)
435 goto error; 440 goto error;
436 441
437 device->name = kstrdup(orig_dev->name, GFP_NOFS); 442 /*
438 if (!device->name) { 443 * This is ok to do without rcu read locked because we hold the
444 * uuid mutex so nothing we touch in here is going to disappear.
445 */
446 name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
447 if (!name) {
439 kfree(device); 448 kfree(device);
440 goto error; 449 goto error;
441 } 450 }
451 rcu_assign_pointer(device->name, name);
442 452
443 device->devid = orig_dev->devid; 453 device->devid = orig_dev->devid;
444 device->work.func = pending_bios_fn; 454 device->work.func = pending_bios_fn;
@@ -491,7 +501,7 @@ again:
491 } 501 }
492 list_del_init(&device->dev_list); 502 list_del_init(&device->dev_list);
493 fs_devices->num_devices--; 503 fs_devices->num_devices--;
494 kfree(device->name); 504 rcu_string_free(device->name);
495 kfree(device); 505 kfree(device);
496 } 506 }
497 507
@@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work)
516 if (device->bdev) 526 if (device->bdev)
517 blkdev_put(device->bdev, device->mode); 527 blkdev_put(device->bdev, device->mode);
518 528
519 kfree(device->name); 529 rcu_string_free(device->name);
520 kfree(device); 530 kfree(device);
521} 531}
522 532
@@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
540 mutex_lock(&fs_devices->device_list_mutex); 550 mutex_lock(&fs_devices->device_list_mutex);
541 list_for_each_entry(device, &fs_devices->devices, dev_list) { 551 list_for_each_entry(device, &fs_devices->devices, dev_list) {
542 struct btrfs_device *new_device; 552 struct btrfs_device *new_device;
553 struct rcu_string *name;
543 554
544 if (device->bdev) 555 if (device->bdev)
545 fs_devices->open_devices--; 556 fs_devices->open_devices--;
@@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
555 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 566 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
556 BUG_ON(!new_device); /* -ENOMEM */ 567 BUG_ON(!new_device); /* -ENOMEM */
557 memcpy(new_device, device, sizeof(*new_device)); 568 memcpy(new_device, device, sizeof(*new_device));
558 new_device->name = kstrdup(device->name, GFP_NOFS); 569
559 BUG_ON(device->name && !new_device->name); /* -ENOMEM */ 570 /* Safe because we are under uuid_mutex */
571 name = rcu_string_strdup(device->name->str, GFP_NOFS);
572 BUG_ON(device->name && !name); /* -ENOMEM */
573 rcu_assign_pointer(new_device->name, name);
560 new_device->bdev = NULL; 574 new_device->bdev = NULL;
561 new_device->writeable = 0; 575 new_device->writeable = 0;
562 new_device->in_fs_metadata = 0; 576 new_device->in_fs_metadata = 0;
@@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
621 if (!device->name) 635 if (!device->name)
622 continue; 636 continue;
623 637
624 bdev = blkdev_get_by_path(device->name, flags, holder); 638 bdev = blkdev_get_by_path(device->name->str, flags, holder);
625 if (IS_ERR(bdev)) { 639 if (IS_ERR(bdev)) {
626 printk(KERN_INFO "open %s failed\n", device->name); 640 printk(KERN_INFO "open %s failed\n", device->name->str);
627 goto error; 641 goto error;
628 } 642 }
629 filemap_write_and_wait(bdev->bd_inode->i_mapping); 643 filemap_write_and_wait(bdev->bd_inode->i_mapping);
@@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1632 struct block_device *bdev; 1646 struct block_device *bdev;
1633 struct list_head *devices; 1647 struct list_head *devices;
1634 struct super_block *sb = root->fs_info->sb; 1648 struct super_block *sb = root->fs_info->sb;
1649 struct rcu_string *name;
1635 u64 total_bytes; 1650 u64 total_bytes;
1636 int seeding_dev = 0; 1651 int seeding_dev = 0;
1637 int ret = 0; 1652 int ret = 0;
@@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1671 goto error; 1686 goto error;
1672 } 1687 }
1673 1688
1674 device->name = kstrdup(device_path, GFP_NOFS); 1689 name = rcu_string_strdup(device_path, GFP_NOFS);
1675 if (!device->name) { 1690 if (!name) {
1676 kfree(device); 1691 kfree(device);
1677 ret = -ENOMEM; 1692 ret = -ENOMEM;
1678 goto error; 1693 goto error;
1679 } 1694 }
1695 rcu_assign_pointer(device->name, name);
1680 1696
1681 ret = find_next_devid(root, &device->devid); 1697 ret = find_next_devid(root, &device->devid);
1682 if (ret) { 1698 if (ret) {
1683 kfree(device->name); 1699 rcu_string_free(device->name);
1684 kfree(device); 1700 kfree(device);
1685 goto error; 1701 goto error;
1686 } 1702 }
1687 1703
1688 trans = btrfs_start_transaction(root, 0); 1704 trans = btrfs_start_transaction(root, 0);
1689 if (IS_ERR(trans)) { 1705 if (IS_ERR(trans)) {
1690 kfree(device->name); 1706 rcu_string_free(device->name);
1691 kfree(device); 1707 kfree(device);
1692 ret = PTR_ERR(trans); 1708 ret = PTR_ERR(trans);
1693 goto error; 1709 goto error;
@@ -1796,7 +1812,7 @@ error_trans:
1796 unlock_chunks(root); 1812 unlock_chunks(root);
1797 btrfs_abort_transaction(trans, root, ret); 1813 btrfs_abort_transaction(trans, root, ret);
1798 btrfs_end_transaction(trans, root); 1814 btrfs_end_transaction(trans, root);
1799 kfree(device->name); 1815 rcu_string_free(device->name);
1800 kfree(device); 1816 kfree(device);
1801error: 1817error:
1802 blkdev_put(bdev, FMODE_EXCL); 1818 blkdev_put(bdev, FMODE_EXCL);
@@ -2829,31 +2845,48 @@ out:
2829 2845
2830static int balance_kthread(void *data) 2846static int balance_kthread(void *data)
2831{ 2847{
2832 struct btrfs_balance_control *bctl = 2848 struct btrfs_fs_info *fs_info = data;
2833 (struct btrfs_balance_control *)data;
2834 struct btrfs_fs_info *fs_info = bctl->fs_info;
2835 int ret = 0; 2849 int ret = 0;
2836 2850
2837 mutex_lock(&fs_info->volume_mutex); 2851 mutex_lock(&fs_info->volume_mutex);
2838 mutex_lock(&fs_info->balance_mutex); 2852 mutex_lock(&fs_info->balance_mutex);
2839 2853
2840 set_balance_control(bctl); 2854 if (fs_info->balance_ctl) {
2841
2842 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2843 printk(KERN_INFO "btrfs: force skipping balance\n");
2844 } else {
2845 printk(KERN_INFO "btrfs: continuing balance\n"); 2855 printk(KERN_INFO "btrfs: continuing balance\n");
2846 ret = btrfs_balance(bctl, NULL); 2856 ret = btrfs_balance(fs_info->balance_ctl, NULL);
2847 } 2857 }
2848 2858
2849 mutex_unlock(&fs_info->balance_mutex); 2859 mutex_unlock(&fs_info->balance_mutex);
2850 mutex_unlock(&fs_info->volume_mutex); 2860 mutex_unlock(&fs_info->volume_mutex);
2861
2851 return ret; 2862 return ret;
2852} 2863}
2853 2864
2854int btrfs_recover_balance(struct btrfs_root *tree_root) 2865int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
2855{ 2866{
2856 struct task_struct *tsk; 2867 struct task_struct *tsk;
2868
2869 spin_lock(&fs_info->balance_lock);
2870 if (!fs_info->balance_ctl) {
2871 spin_unlock(&fs_info->balance_lock);
2872 return 0;
2873 }
2874 spin_unlock(&fs_info->balance_lock);
2875
2876 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2877 printk(KERN_INFO "btrfs: force skipping balance\n");
2878 return 0;
2879 }
2880
2881 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
2882 if (IS_ERR(tsk))
2883 return PTR_ERR(tsk);
2884
2885 return 0;
2886}
2887
2888int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
2889{
2857 struct btrfs_balance_control *bctl; 2890 struct btrfs_balance_control *bctl;
2858 struct btrfs_balance_item *item; 2891 struct btrfs_balance_item *item;
2859 struct btrfs_disk_balance_args disk_bargs; 2892 struct btrfs_disk_balance_args disk_bargs;
@@ -2866,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
2866 if (!path) 2899 if (!path)
2867 return -ENOMEM; 2900 return -ENOMEM;
2868 2901
2869 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2870 if (!bctl) {
2871 ret = -ENOMEM;
2872 goto out;
2873 }
2874
2875 key.objectid = BTRFS_BALANCE_OBJECTID; 2902 key.objectid = BTRFS_BALANCE_OBJECTID;
2876 key.type = BTRFS_BALANCE_ITEM_KEY; 2903 key.type = BTRFS_BALANCE_ITEM_KEY;
2877 key.offset = 0; 2904 key.offset = 0;
2878 2905
2879 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); 2906 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
2880 if (ret < 0) 2907 if (ret < 0)
2881 goto out_bctl; 2908 goto out;
2882 if (ret > 0) { /* ret = -ENOENT; */ 2909 if (ret > 0) { /* ret = -ENOENT; */
2883 ret = 0; 2910 ret = 0;
2884 goto out_bctl; 2911 goto out;
2912 }
2913
2914 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2915 if (!bctl) {
2916 ret = -ENOMEM;
2917 goto out;
2885 } 2918 }
2886 2919
2887 leaf = path->nodes[0]; 2920 leaf = path->nodes[0];
2888 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); 2921 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
2889 2922
2890 bctl->fs_info = tree_root->fs_info; 2923 bctl->fs_info = fs_info;
2891 bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; 2924 bctl->flags = btrfs_balance_flags(leaf, item);
2925 bctl->flags |= BTRFS_BALANCE_RESUME;
2892 2926
2893 btrfs_balance_data(leaf, item, &disk_bargs); 2927 btrfs_balance_data(leaf, item, &disk_bargs);
2894 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); 2928 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2897,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
2897 btrfs_balance_sys(leaf, item, &disk_bargs); 2931 btrfs_balance_sys(leaf, item, &disk_bargs);
2898 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); 2932 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
2899 2933
2900 tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); 2934 mutex_lock(&fs_info->volume_mutex);
2901 if (IS_ERR(tsk)) 2935 mutex_lock(&fs_info->balance_mutex);
2902 ret = PTR_ERR(tsk);
2903 else
2904 goto out;
2905 2936
2906out_bctl: 2937 set_balance_control(bctl);
2907 kfree(bctl); 2938
2939 mutex_unlock(&fs_info->balance_mutex);
2940 mutex_unlock(&fs_info->volume_mutex);
2908out: 2941out:
2909 btrfs_free_path(path); 2942 btrfs_free_path(path);
2910 return ret; 2943 return ret;
@@ -4045,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
4045 4078
4046 BUG_ON(stripe_index >= bbio->num_stripes); 4079 BUG_ON(stripe_index >= bbio->num_stripes);
4047 dev = bbio->stripes[stripe_index].dev; 4080 dev = bbio->stripes[stripe_index].dev;
4048 if (bio->bi_rw & WRITE) 4081 if (dev->bdev) {
4049 btrfs_dev_stat_inc(dev, 4082 if (bio->bi_rw & WRITE)
4050 BTRFS_DEV_STAT_WRITE_ERRS); 4083 btrfs_dev_stat_inc(dev,
4051 else 4084 BTRFS_DEV_STAT_WRITE_ERRS);
4052 btrfs_dev_stat_inc(dev, 4085 else
4053 BTRFS_DEV_STAT_READ_ERRS); 4086 btrfs_dev_stat_inc(dev,
4054 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) 4087 BTRFS_DEV_STAT_READ_ERRS);
4055 btrfs_dev_stat_inc(dev, 4088 if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
4056 BTRFS_DEV_STAT_FLUSH_ERRS); 4089 btrfs_dev_stat_inc(dev,
4057 btrfs_dev_stat_print_on_error(dev); 4090 BTRFS_DEV_STAT_FLUSH_ERRS);
4091 btrfs_dev_stat_print_on_error(dev);
4092 }
4058 } 4093 }
4059 } 4094 }
4060 4095
@@ -4204,10 +4239,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4204 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; 4239 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
4205 dev = bbio->stripes[dev_nr].dev; 4240 dev = bbio->stripes[dev_nr].dev;
4206 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { 4241 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
4242#ifdef DEBUG
4243 struct rcu_string *name;
4244
4245 rcu_read_lock();
4246 name = rcu_dereference(dev->name);
4207 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " 4247 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
4208 "(%s id %llu), size=%u\n", rw, 4248 "(%s id %llu), size=%u\n", rw,
4209 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, 4249 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
4210 dev->name, dev->devid, bio->bi_size); 4250 name->str, dev->devid, bio->bi_size);
4251 rcu_read_unlock();
4252#endif
4211 bio->bi_bdev = dev->bdev; 4253 bio->bi_bdev = dev->bdev;
4212 if (async_submit) 4254 if (async_submit)
4213 schedule_bio(root, dev, rw, bio); 4255 schedule_bio(root, dev, rw, bio);
@@ -4694,8 +4736,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
4694 key.offset = device->devid; 4736 key.offset = device->devid;
4695 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); 4737 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
4696 if (ret) { 4738 if (ret) {
4697 printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", 4739 printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
4698 device->name, (unsigned long long)device->devid); 4740 rcu_str_deref(device->name),
4741 (unsigned long long)device->devid);
4699 __btrfs_reset_dev_stats(device); 4742 __btrfs_reset_dev_stats(device);
4700 device->dev_stats_valid = 1; 4743 device->dev_stats_valid = 1;
4701 btrfs_release_path(path); 4744 btrfs_release_path(path);
@@ -4747,8 +4790,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4747 BUG_ON(!path); 4790 BUG_ON(!path);
4748 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 4791 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
4749 if (ret < 0) { 4792 if (ret < 0) {
4750 printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", 4793 printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
4751 ret, device->name); 4794 ret, rcu_str_deref(device->name));
4752 goto out; 4795 goto out;
4753 } 4796 }
4754 4797
@@ -4757,8 +4800,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4757 /* need to delete old one and insert a new one */ 4800 /* need to delete old one and insert a new one */
4758 ret = btrfs_del_item(trans, dev_root, path); 4801 ret = btrfs_del_item(trans, dev_root, path);
4759 if (ret != 0) { 4802 if (ret != 0) {
4760 printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", 4803 printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
4761 device->name, ret); 4804 rcu_str_deref(device->name), ret);
4762 goto out; 4805 goto out;
4763 } 4806 }
4764 ret = 1; 4807 ret = 1;
@@ -4770,8 +4813,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
4770 ret = btrfs_insert_empty_item(trans, dev_root, path, 4813 ret = btrfs_insert_empty_item(trans, dev_root, path,
4771 &key, sizeof(*ptr)); 4814 &key, sizeof(*ptr));
4772 if (ret < 0) { 4815 if (ret < 0) {
4773 printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", 4816 printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
4774 device->name, ret); 4817 rcu_str_deref(device->name), ret);
4775 goto out; 4818 goto out;
4776 } 4819 }
4777 } 4820 }
@@ -4823,9 +4866,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4823{ 4866{
4824 if (!dev->dev_stats_valid) 4867 if (!dev->dev_stats_valid)
4825 return; 4868 return;
4826 printk_ratelimited(KERN_ERR 4869 printk_ratelimited_in_rcu(KERN_ERR
4827 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4870 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4828 dev->name, 4871 rcu_str_deref(dev->name),
4829 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4872 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4830 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4873 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4831 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 4874 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
@@ -4837,8 +4880,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
4837 4880
4838static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) 4881static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
4839{ 4882{
4840 printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 4883 printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
4841 dev->name, 4884 rcu_str_deref(dev->name),
4842 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 4885 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
4843 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 4886 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
4844 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 4887 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3406a88ca83e..95f6637614db 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -58,7 +58,7 @@ struct btrfs_device {
58 /* the mode sent to blkdev_get */ 58 /* the mode sent to blkdev_get */
59 fmode_t mode; 59 fmode_t mode;
60 60
61 char *name; 61 struct rcu_string *name;
62 62
63 /* the internal btrfs device id */ 63 /* the internal btrfs device id */
64 u64 devid; 64 u64 devid;
@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
281int btrfs_init_new_device(struct btrfs_root *root, char *path); 281int btrfs_init_new_device(struct btrfs_root *root, char *path);
282int btrfs_balance(struct btrfs_balance_control *bctl, 282int btrfs_balance(struct btrfs_balance_control *bctl,
283 struct btrfs_ioctl_balance_args *bargs); 283 struct btrfs_ioctl_balance_args *bargs);
284int btrfs_recover_balance(struct btrfs_root *tree_root); 284int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
285int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
285int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 286int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
286int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 287int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
287int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 288int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
diff --git a/fs/buffer.c b/fs/buffer.c
index 838a9cf246bd..c7062c896d7c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1036,6 +1036,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1036static struct buffer_head * 1036static struct buffer_head *
1037__getblk_slow(struct block_device *bdev, sector_t block, int size) 1037__getblk_slow(struct block_device *bdev, sector_t block, int size)
1038{ 1038{
1039 int ret;
1040 struct buffer_head *bh;
1041
1039 /* Size must be multiple of hard sectorsize */ 1042 /* Size must be multiple of hard sectorsize */
1040 if (unlikely(size & (bdev_logical_block_size(bdev)-1) || 1043 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1041 (size < 512 || size > PAGE_SIZE))) { 1044 (size < 512 || size > PAGE_SIZE))) {
@@ -1048,20 +1051,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1048 return NULL; 1051 return NULL;
1049 } 1052 }
1050 1053
1051 for (;;) { 1054retry:
1052 struct buffer_head * bh; 1055 bh = __find_get_block(bdev, block, size);
1053 int ret; 1056 if (bh)
1057 return bh;
1054 1058
1059 ret = grow_buffers(bdev, block, size);
1060 if (ret == 0) {
1061 free_more_memory();
1062 goto retry;
1063 } else if (ret > 0) {
1055 bh = __find_get_block(bdev, block, size); 1064 bh = __find_get_block(bdev, block, size);
1056 if (bh) 1065 if (bh)
1057 return bh; 1066 return bh;
1058
1059 ret = grow_buffers(bdev, block, size);
1060 if (ret < 0)
1061 return NULL;
1062 if (ret == 0)
1063 free_more_memory();
1064 } 1067 }
1068 return NULL;
1065} 1069}
1066 1070
1067/* 1071/*
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 173b1d22e59b..8b67304e4b80 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -54,7 +54,12 @@
54 (CONGESTION_ON_THRESH(congestion_kb) - \ 54 (CONGESTION_ON_THRESH(congestion_kb) - \
55 (CONGESTION_ON_THRESH(congestion_kb) >> 2)) 55 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
56 56
57 57static inline struct ceph_snap_context *page_snap_context(struct page *page)
58{
59 if (PagePrivate(page))
60 return (void *)page->private;
61 return NULL;
62}
58 63
59/* 64/*
60 * Dirty a page. Optimistically adjust accounting, on the assumption 65 * Dirty a page. Optimistically adjust accounting, on the assumption
@@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)
142{ 147{
143 struct inode *inode; 148 struct inode *inode;
144 struct ceph_inode_info *ci; 149 struct ceph_inode_info *ci;
145 struct ceph_snap_context *snapc = (void *)page->private; 150 struct ceph_snap_context *snapc = page_snap_context(page);
146 151
147 BUG_ON(!PageLocked(page)); 152 BUG_ON(!PageLocked(page));
148 BUG_ON(!page->private);
149 BUG_ON(!PagePrivate(page)); 153 BUG_ON(!PagePrivate(page));
150 BUG_ON(!page->mapping); 154 BUG_ON(!page->mapping);
151 155
@@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g)
182 struct inode *inode = page->mapping ? page->mapping->host : NULL; 186 struct inode *inode = page->mapping ? page->mapping->host : NULL;
183 dout("%p releasepage %p idx %lu\n", inode, page, page->index); 187 dout("%p releasepage %p idx %lu\n", inode, page, page->index);
184 WARN_ON(PageDirty(page)); 188 WARN_ON(PageDirty(page));
185 WARN_ON(page->private);
186 WARN_ON(PagePrivate(page)); 189 WARN_ON(PagePrivate(page));
187 return 0; 190 return 0;
188} 191}
@@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
443 osdc = &fsc->client->osdc; 446 osdc = &fsc->client->osdc;
444 447
445 /* verify this is a writeable snap context */ 448 /* verify this is a writeable snap context */
446 snapc = (void *)page->private; 449 snapc = page_snap_context(page);
447 if (snapc == NULL) { 450 if (snapc == NULL) {
448 dout("writepage %p page %p not dirty?\n", inode, page); 451 dout("writepage %p page %p not dirty?\n", inode, page);
449 goto out; 452 goto out;
@@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
451 oldest = get_oldest_context(inode, &snap_size); 454 oldest = get_oldest_context(inode, &snap_size);
452 if (snapc->seq > oldest->seq) { 455 if (snapc->seq > oldest->seq) {
453 dout("writepage %p page %p snapc %p not writeable - noop\n", 456 dout("writepage %p page %p snapc %p not writeable - noop\n",
454 inode, page, (void *)page->private); 457 inode, page, snapc);
455 /* we should only noop if called by kswapd */ 458 /* we should only noop if called by kswapd */
456 WARN_ON((current->flags & PF_MEMALLOC) == 0); 459 WARN_ON((current->flags & PF_MEMALLOC) == 0);
457 ceph_put_snap_context(oldest); 460 ceph_put_snap_context(oldest);
@@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req,
591 clear_bdi_congested(&fsc->backing_dev_info, 594 clear_bdi_congested(&fsc->backing_dev_info,
592 BLK_RW_ASYNC); 595 BLK_RW_ASYNC);
593 596
594 ceph_put_snap_context((void *)page->private); 597 ceph_put_snap_context(page_snap_context(page));
595 page->private = 0; 598 page->private = 0;
596 ClearPagePrivate(page); 599 ClearPagePrivate(page);
597 dout("unlocking %d %p\n", i, page); 600 dout("unlocking %d %p\n", i, page);
@@ -795,7 +798,7 @@ get_more_pages:
795 } 798 }
796 799
797 /* only if matching snap context */ 800 /* only if matching snap context */
798 pgsnapc = (void *)page->private; 801 pgsnapc = page_snap_context(page);
799 if (pgsnapc->seq > snapc->seq) { 802 if (pgsnapc->seq > snapc->seq) {
800 dout("page snapc %p %lld > oldest %p %lld\n", 803 dout("page snapc %p %lld > oldest %p %lld\n",
801 pgsnapc, pgsnapc->seq, snapc, snapc->seq); 804 pgsnapc, pgsnapc->seq, snapc, snapc->seq);
@@ -984,7 +987,7 @@ retry_locked:
984 BUG_ON(!ci->i_snap_realm); 987 BUG_ON(!ci->i_snap_realm);
985 down_read(&mdsc->snap_rwsem); 988 down_read(&mdsc->snap_rwsem);
986 BUG_ON(!ci->i_snap_realm->cached_context); 989 BUG_ON(!ci->i_snap_realm->cached_context);
987 snapc = (void *)page->private; 990 snapc = page_snap_context(page);
988 if (snapc && snapc != ci->i_head_snapc) { 991 if (snapc && snapc != ci->i_head_snapc) {
989 /* 992 /*
990 * this page is already dirty in another (older) snap 993 * this page is already dirty in another (older) snap
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5b400730c213..4ee522b3f66f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -86,7 +86,31 @@ static struct {
86#endif /* CONFIG_CIFS_WEAK_PW_HASH */ 86#endif /* CONFIG_CIFS_WEAK_PW_HASH */
87#endif /* CIFS_POSIX */ 87#endif /* CIFS_POSIX */
88 88
89/* Forward declarations */ 89#ifdef CONFIG_HIGHMEM
90/*
91 * On arches that have high memory, kmap address space is limited. By
92 * serializing the kmap operations on those arches, we ensure that we don't
93 * end up with a bunch of threads in writeback with partially mapped page
94 * arrays, stuck waiting for kmap to come back. That situation prevents
95 * progress and can deadlock.
96 */
97static DEFINE_MUTEX(cifs_kmap_mutex);
98
99static inline void
100cifs_kmap_lock(void)
101{
102 mutex_lock(&cifs_kmap_mutex);
103}
104
105static inline void
106cifs_kmap_unlock(void)
107{
108 mutex_unlock(&cifs_kmap_mutex);
109}
110#else /* !CONFIG_HIGHMEM */
111#define cifs_kmap_lock() do { ; } while(0)
112#define cifs_kmap_unlock() do { ; } while(0)
113#endif /* CONFIG_HIGHMEM */
90 114
91/* Mark as invalid, all open files on tree connections since they 115/* Mark as invalid, all open files on tree connections since they
92 were closed when session to server was lost */ 116 were closed when session to server was lost */
@@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1503 } 1527 }
1504 1528
1505 /* marshal up the page array */ 1529 /* marshal up the page array */
1530 cifs_kmap_lock();
1506 len = rdata->marshal_iov(rdata, data_len); 1531 len = rdata->marshal_iov(rdata, data_len);
1532 cifs_kmap_unlock();
1507 data_len -= len; 1533 data_len -= len;
1508 1534
1509 /* issue the read if we have any iovecs left to fill */ 1535 /* issue the read if we have any iovecs left to fill */
@@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata)
2069 * and set the iov_len properly for each one. It may also set 2095 * and set the iov_len properly for each one. It may also set
2070 * wdata->bytes too. 2096 * wdata->bytes too.
2071 */ 2097 */
2098 cifs_kmap_lock();
2072 wdata->marshal_iov(iov, wdata); 2099 wdata->marshal_iov(iov, wdata);
2100 cifs_kmap_unlock();
2073 2101
2074 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); 2102 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
2075 2103
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 78db68a5cf44..94b7788c3189 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1653,24 +1653,26 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1653 * If yes, we have encountered a double deliminator 1653 * If yes, we have encountered a double deliminator
1654 * reset the NULL character to the deliminator 1654 * reset the NULL character to the deliminator
1655 */ 1655 */
1656 if (tmp_end < end && tmp_end[1] == delim) 1656 if (tmp_end < end && tmp_end[1] == delim) {
1657 tmp_end[0] = delim; 1657 tmp_end[0] = delim;
1658 1658
1659 /* Keep iterating until we get to a single deliminator 1659 /* Keep iterating until we get to a single
1660 * OR the end 1660 * deliminator OR the end
1661 */ 1661 */
1662 while ((tmp_end = strchr(tmp_end, delim)) != NULL && 1662 while ((tmp_end = strchr(tmp_end, delim))
1663 (tmp_end[1] == delim)) { 1663 != NULL && (tmp_end[1] == delim)) {
1664 tmp_end = (char *) &tmp_end[2]; 1664 tmp_end = (char *) &tmp_end[2];
1665 } 1665 }
1666 1666
1667 /* Reset var options to point to next element */ 1667 /* Reset var options to point to next element */
1668 if (tmp_end) { 1668 if (tmp_end) {
1669 tmp_end[0] = '\0'; 1669 tmp_end[0] = '\0';
1670 options = (char *) &tmp_end[1]; 1670 options = (char *) &tmp_end[1];
1671 } else 1671 } else
1672 /* Reached the end of the mount option string */ 1672 /* Reached the end of the mount option
1673 options = end; 1673 * string */
1674 options = end;
1675 }
1674 1676
1675 /* Now build new password string */ 1677 /* Now build new password string */
1676 temp_len = strlen(value); 1678 temp_len = strlen(value);
@@ -3443,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
3443#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) 3445#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
3444#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) 3446#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
3445 3447
3448/*
3449 * On hosts with high memory, we can't currently support wsize/rsize that are
3450 * larger than we can kmap at once. Cap the rsize/wsize at
3451 * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
3452 * larger than that anyway.
3453 */
3454#ifdef CONFIG_HIGHMEM
3455#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE)
3456#else /* CONFIG_HIGHMEM */
3457#define CIFS_KMAP_SIZE_LIMIT (1<<24)
3458#endif /* CONFIG_HIGHMEM */
3459
3446static unsigned int 3460static unsigned int
3447cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) 3461cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3448{ 3462{
@@ -3473,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3473 wsize = min_t(unsigned int, wsize, 3487 wsize = min_t(unsigned int, wsize,
3474 server->maxBuf - sizeof(WRITE_REQ) + 4); 3488 server->maxBuf - sizeof(WRITE_REQ) + 4);
3475 3489
3490 /* limit to the amount that we can kmap at once */
3491 wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
3492
3476 /* hard limit of CIFS_MAX_WSIZE */ 3493 /* hard limit of CIFS_MAX_WSIZE */
3477 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); 3494 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
3478 3495
@@ -3493,18 +3510,15 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3493 * MS-CIFS indicates that servers are only limited by the client's 3510 * MS-CIFS indicates that servers are only limited by the client's
3494 * bufsize for reads, testing against win98se shows that it throws 3511 * bufsize for reads, testing against win98se shows that it throws
3495 * INVALID_PARAMETER errors if you try to request too large a read. 3512 * INVALID_PARAMETER errors if you try to request too large a read.
3513 * OS/2 just sends back short reads.
3496 * 3514 *
3497 * If the server advertises a MaxBufferSize of less than one page, 3515 * If the server doesn't advertise CAP_LARGE_READ_X, then assume that
3498 * assume that it also can't satisfy reads larger than that either. 3516 * it can't handle a read request larger than its MaxBufferSize either.
3499 *
3500 * FIXME: Is there a better heuristic for this?
3501 */ 3517 */
3502 if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) 3518 if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP))
3503 defsize = CIFS_DEFAULT_IOSIZE; 3519 defsize = CIFS_DEFAULT_IOSIZE;
3504 else if (server->capabilities & CAP_LARGE_READ_X) 3520 else if (server->capabilities & CAP_LARGE_READ_X)
3505 defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; 3521 defsize = CIFS_DEFAULT_NON_POSIX_RSIZE;
3506 else if (server->maxBuf >= PAGE_CACHE_SIZE)
3507 defsize = CIFSMaxBufSize;
3508 else 3522 else
3509 defsize = server->maxBuf - sizeof(READ_RSP); 3523 defsize = server->maxBuf - sizeof(READ_RSP);
3510 3524
@@ -3517,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3517 if (!(server->capabilities & CAP_LARGE_READ_X)) 3531 if (!(server->capabilities & CAP_LARGE_READ_X))
3518 rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); 3532 rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
3519 3533
3534 /* limit to the amount that we can kmap at once */
3535 rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
3536
3520 /* hard limit of CIFS_MAX_RSIZE */ 3537 /* hard limit of CIFS_MAX_RSIZE */
3521 rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); 3538 rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
3522 3539
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0a8224d1c4c5..a4217f02fab2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
86 86
87 dentry = d_lookup(parent, name); 87 dentry = d_lookup(parent, name);
88 if (dentry) { 88 if (dentry) {
89 /* FIXME: check for inode number changes? */ 89 inode = dentry->d_inode;
90 if (dentry->d_inode != NULL) 90 /* update inode in place if i_ino didn't change */
91 if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
92 cifs_fattr_to_inode(inode, fattr);
91 return dentry; 93 return dentry;
94 }
92 d_drop(dentry); 95 d_drop(dentry);
93 dput(dentry); 96 dput(dentry);
94 } 97 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3097ee58fd7d..f25d4ea14be4 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
365 if (mid == NULL) 365 if (mid == NULL)
366 return -ENOMEM; 366 return -ENOMEM;
367 367
368 /* put it on the pending_mid_q */
369 spin_lock(&GlobalMid_Lock);
370 list_add_tail(&mid->qhead, &server->pending_mid_q);
371 spin_unlock(&GlobalMid_Lock);
372
373 rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number); 368 rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
374 if (rc) 369 if (rc) {
375 delete_mid(mid); 370 DeleteMidQEntry(mid);
371 return rc;
372 }
373
376 *ret_mid = mid; 374 *ret_mid = mid;
377 return rc; 375 return 0;
378} 376}
379 377
380/* 378/*
@@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
407 mid->callback_data = cbdata; 405 mid->callback_data = cbdata;
408 mid->mid_state = MID_REQUEST_SUBMITTED; 406 mid->mid_state = MID_REQUEST_SUBMITTED;
409 407
408 /* put it on the pending_mid_q */
409 spin_lock(&GlobalMid_Lock);
410 list_add_tail(&mid->qhead, &server->pending_mid_q);
411 spin_unlock(&GlobalMid_Lock);
412
413
410 cifs_in_send_inc(server); 414 cifs_in_send_inc(server);
411 rc = smb_sendv(server, iov, nvec); 415 rc = smb_sendv(server, iov, nvec);
412 cifs_in_send_dec(server); 416 cifs_in_send_dec(server);
413 cifs_save_when_sent(mid); 417 cifs_save_when_sent(mid);
414 mutex_unlock(&server->srv_mutex); 418 mutex_unlock(&server->srv_mutex);
415 419
416 if (rc) 420 if (rc == 0)
417 goto out_err; 421 return 0;
418 422
419 return rc;
420out_err:
421 delete_mid(mid); 423 delete_mid(mid);
422 add_credits(server, 1); 424 add_credits(server, 1);
423 wake_up(&server->request_q); 425 wake_up(&server->request_q);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 69f994a7d524..0dbe58a8b172 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
149 (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); 149 (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
150 if (!IS_ERR(*lower_file)) 150 if (!IS_ERR(*lower_file))
151 goto out; 151 goto out;
152 if (flags & O_RDONLY) { 152 if ((flags & O_ACCMODE) == O_RDONLY) {
153 rc = PTR_ERR((*lower_file)); 153 rc = PTR_ERR((*lower_file));
154 goto out; 154 goto out;
155 } 155 }
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 3a06f4043df4..c0038f6566d4 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
49 mutex_lock(&ecryptfs_daemon_hash_mux); 49 mutex_lock(&ecryptfs_daemon_hash_mux);
50 /* TODO: Just use file->private_data? */ 50 /* TODO: Just use file->private_data? */
51 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); 51 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
52 BUG_ON(rc || !daemon); 52 if (rc || !daemon) {
53 mutex_unlock(&ecryptfs_daemon_hash_mux);
54 return -EINVAL;
55 }
53 mutex_lock(&daemon->mux); 56 mutex_lock(&daemon->mux);
54 mutex_unlock(&ecryptfs_daemon_hash_mux); 57 mutex_unlock(&ecryptfs_daemon_hash_mux);
55 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { 58 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
@@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
122 goto out_unlock_daemon; 125 goto out_unlock_daemon;
123 } 126 }
124 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; 127 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
128 file->private_data = daemon;
125 atomic_inc(&ecryptfs_num_miscdev_opens); 129 atomic_inc(&ecryptfs_num_miscdev_opens);
126out_unlock_daemon: 130out_unlock_daemon:
127 mutex_unlock(&daemon->mux); 131 mutex_unlock(&daemon->mux);
@@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
152 156
153 mutex_lock(&ecryptfs_daemon_hash_mux); 157 mutex_lock(&ecryptfs_daemon_hash_mux);
154 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); 158 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
155 BUG_ON(rc || !daemon); 159 if (rc || !daemon)
160 daemon = file->private_data;
156 mutex_lock(&daemon->mux); 161 mutex_lock(&daemon->mux);
157 BUG_ON(daemon->pid != task_pid(current));
158 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); 162 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
159 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; 163 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
160 atomic_dec(&ecryptfs_num_miscdev_opens); 164 atomic_dec(&ecryptfs_num_miscdev_opens);
@@ -191,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
191 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, 195 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
192 u16 msg_flags, struct ecryptfs_daemon *daemon) 196 u16 msg_flags, struct ecryptfs_daemon *daemon)
193{ 197{
194 int rc = 0; 198 struct ecryptfs_message *msg;
195 199
196 mutex_lock(&msg_ctx->mux); 200 msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
197 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), 201 if (!msg) {
198 GFP_KERNEL);
199 if (!msg_ctx->msg) {
200 rc = -ENOMEM;
201 printk(KERN_ERR "%s: Out of memory whilst attempting " 202 printk(KERN_ERR "%s: Out of memory whilst attempting "
202 "to kmalloc(%zd, GFP_KERNEL)\n", __func__, 203 "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
203 (sizeof(*msg_ctx->msg) + data_size)); 204 (sizeof(*msg) + data_size));
204 goto out_unlock; 205 return -ENOMEM;
205 } 206 }
207
208 mutex_lock(&msg_ctx->mux);
209 msg_ctx->msg = msg;
206 msg_ctx->msg->index = msg_ctx->index; 210 msg_ctx->msg->index = msg_ctx->index;
207 msg_ctx->msg->data_len = data_size; 211 msg_ctx->msg->data_len = data_size;
208 msg_ctx->type = msg_type; 212 msg_ctx->type = msg_type;
209 memcpy(msg_ctx->msg->data, data, data_size); 213 memcpy(msg_ctx->msg->data, data, data_size);
210 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); 214 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
211 mutex_lock(&daemon->mux);
212 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); 215 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
216 mutex_unlock(&msg_ctx->mux);
217
218 mutex_lock(&daemon->mux);
213 daemon->num_queued_msg_ctx++; 219 daemon->num_queued_msg_ctx++;
214 wake_up_interruptible(&daemon->wait); 220 wake_up_interruptible(&daemon->wait);
215 mutex_unlock(&daemon->mux); 221 mutex_unlock(&daemon->mux);
216out_unlock: 222
217 mutex_unlock(&msg_ctx->mux); 223 return 0;
218 return rc;
219} 224}
220 225
221/* 226/*
@@ -269,8 +274,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
269 mutex_lock(&ecryptfs_daemon_hash_mux); 274 mutex_lock(&ecryptfs_daemon_hash_mux);
270 /* TODO: Just use file->private_data? */ 275 /* TODO: Just use file->private_data? */
271 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); 276 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
272 BUG_ON(rc || !daemon); 277 if (rc || !daemon) {
278 mutex_unlock(&ecryptfs_daemon_hash_mux);
279 return -EINVAL;
280 }
273 mutex_lock(&daemon->mux); 281 mutex_lock(&daemon->mux);
282 if (task_pid(current) != daemon->pid) {
283 mutex_unlock(&daemon->mux);
284 mutex_unlock(&ecryptfs_daemon_hash_mux);
285 return -EPERM;
286 }
274 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { 287 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
275 rc = 0; 288 rc = 0;
276 mutex_unlock(&ecryptfs_daemon_hash_mux); 289 mutex_unlock(&ecryptfs_daemon_hash_mux);
@@ -307,9 +320,6 @@ check_list:
307 * message from the queue; try again */ 320 * message from the queue; try again */
308 goto check_list; 321 goto check_list;
309 } 322 }
310 BUG_ON(euid != daemon->euid);
311 BUG_ON(current_user_ns() != daemon->user_ns);
312 BUG_ON(task_pid(current) != daemon->pid);
313 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, 323 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
314 struct ecryptfs_msg_ctx, daemon_out_list); 324 struct ecryptfs_msg_ctx, daemon_out_list);
315 BUG_ON(!msg_ctx); 325 BUG_ON(!msg_ctx);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 74598f67efeb..1c8b55670804 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1710 goto error_tgt_fput; 1710 goto error_tgt_fput;
1711 1711
1712 /* Check if EPOLLWAKEUP is allowed */ 1712 /* Check if EPOLLWAKEUP is allowed */
1713 if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) 1713 if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
1714 epds.events &= ~EPOLLWAKEUP; 1714 epds.events &= ~EPOLLWAKEUP;
1715 1715
1716 /* 1716 /*
diff --git a/fs/exec.c b/fs/exec.c
index a79786a8d2c8..da27b91ff1e8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -819,10 +819,10 @@ static int exec_mmap(struct mm_struct *mm)
819 /* Notify parent that we're no longer interested in the old VM */ 819 /* Notify parent that we're no longer interested in the old VM */
820 tsk = current; 820 tsk = current;
821 old_mm = current->mm; 821 old_mm = current->mm;
822 sync_mm_rss(old_mm);
823 mm_release(tsk, old_mm); 822 mm_release(tsk, old_mm);
824 823
825 if (old_mm) { 824 if (old_mm) {
825 sync_mm_rss(old_mm);
826 /* 826 /*
827 * Make sure that if there is a core dump in progress 827 * Make sure that if there is a core dump in progress
828 * for the old mm, we get out and die instead of going 828 * for the old mm, we get out and die instead of going
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 49cf230554a2..24a49d47e935 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
735out: 735out:
736 ios->numdevs = devs_in_group; 736 ios->numdevs = devs_in_group;
737 ios->pages_consumed = cur_pg; 737 ios->pages_consumed = cur_pg;
738 if (unlikely(ret)) { 738 return ret;
739 if (length == ios->length)
740 return ret;
741 else
742 ios->length -= length;
743 }
744 return 0;
745} 739}
746 740
747int ore_create(struct ore_io_state *ios) 741int ore_create(struct ore_io_state *ios)
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d222c77cfa1b..5f376d14fdcc 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
144{ 144{
145 unsigned data_devs = sp2d->data_devs; 145 unsigned data_devs = sp2d->data_devs;
146 unsigned group_width = data_devs + sp2d->parity; 146 unsigned group_width = data_devs + sp2d->parity;
147 unsigned p; 147 int p, c;
148 148
149 if (!sp2d->needed) 149 if (!sp2d->needed)
150 return; 150 return;
151 151
152 for (p = 0; p < sp2d->pages_in_unit; p++) { 152 for (c = data_devs - 1; c >= 0; --c)
153 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 153 for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
154 154 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
155 if (_1ps->write_count < group_width) {
156 unsigned c;
157 155
158 for (c = 0; c < data_devs; c++) 156 if (_1ps->page_is_read[c]) {
159 if (_1ps->page_is_read[c]) { 157 struct page *page = _1ps->pages[c];
160 struct page *page = _1ps->pages[c];
161 158
162 r4w->put_page(priv, page); 159 r4w->put_page(priv, page);
163 _1ps->page_is_read[c] = false; 160 _1ps->page_is_read[c] = false;
164 } 161 }
165 } 162 }
166 163
164 for (p = 0; p < sp2d->pages_in_unit; p++) {
165 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
166
167 memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); 167 memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
168 _1ps->write_count = 0; 168 _1ps->write_count = 0;
169 _1ps->tx = NULL; 169 _1ps->tx = NULL;
@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
461 * ios->sp2d[p][*], xor is calculated the same way. These pages are 461 * ios->sp2d[p][*], xor is calculated the same way. These pages are
462 * allocated/freed and don't go through cache 462 * allocated/freed and don't go through cache
463 */ 463 */
464static int _read_4_write(struct ore_io_state *ios) 464static int _read_4_write_first_stripe(struct ore_io_state *ios)
465{ 465{
466 struct ore_io_state *ios_read;
467 struct ore_striping_info read_si; 466 struct ore_striping_info read_si;
468 struct __stripe_pages_2d *sp2d = ios->sp2d; 467 struct __stripe_pages_2d *sp2d = ios->sp2d;
469 u64 offset = ios->si.first_stripe_start; 468 u64 offset = ios->si.first_stripe_start;
470 u64 last_stripe_end; 469 unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
471 unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
472 unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
473 int ret;
474 470
475 if (offset == ios->offset) /* Go to start collect $200 */ 471 if (offset == ios->offset) /* Go to start collect $200 */
476 goto read_last_stripe; 472 goto read_last_stripe;
@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
478 min_p = _sp2d_min_pg(sp2d); 474 min_p = _sp2d_min_pg(sp2d);
479 max_p = _sp2d_max_pg(sp2d); 475 max_p = _sp2d_max_pg(sp2d);
480 476
477 ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
478 offset, ios->offset, min_p, max_p);
479
481 for (c = 0; ; c++) { 480 for (c = 0; ; c++) {
482 ore_calc_stripe_info(ios->layout, offset, 0, &read_si); 481 ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
483 read_si.obj_offset += min_p * PAGE_SIZE; 482 read_si.obj_offset += min_p * PAGE_SIZE;
@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
512 } 511 }
513 512
514read_last_stripe: 513read_last_stripe:
514 return 0;
515}
516
517static int _read_4_write_last_stripe(struct ore_io_state *ios)
518{
519 struct ore_striping_info read_si;
520 struct __stripe_pages_2d *sp2d = ios->sp2d;
521 u64 offset;
522 u64 last_stripe_end;
523 unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
524 unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
525
515 offset = ios->offset + ios->length; 526 offset = ios->offset + ios->length;
516 if (offset % PAGE_SIZE) 527 if (offset % PAGE_SIZE)
517 _add_to_r4w_last_page(ios, &offset); 528 _add_to_r4w_last_page(ios, &offset);
@@ -527,15 +538,15 @@ read_last_stripe:
527 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, 538 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
528 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); 539 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
529 540
530 BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
531 /* unaligned IO must be within a single stripe */
532
533 if (min_p == sp2d->pages_in_unit) { 541 if (min_p == sp2d->pages_in_unit) {
534 /* Didn't do it yet */ 542 /* Didn't do it yet */
535 min_p = _sp2d_min_pg(sp2d); 543 min_p = _sp2d_min_pg(sp2d);
536 max_p = _sp2d_max_pg(sp2d); 544 max_p = _sp2d_max_pg(sp2d);
537 } 545 }
538 546
547 ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
548 offset, last_stripe_end, min_p, max_p);
549
539 while (offset < last_stripe_end) { 550 while (offset < last_stripe_end) {
540 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 551 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
541 552
@@ -568,6 +579,15 @@ read_last_stripe:
568 } 579 }
569 580
570read_it: 581read_it:
582 return 0;
583}
584
585static int _read_4_write_execute(struct ore_io_state *ios)
586{
587 struct ore_io_state *ios_read;
588 unsigned i;
589 int ret;
590
571 ios_read = ios->ios_read_4_write; 591 ios_read = ios->ios_read_4_write;
572 if (!ios_read) 592 if (!ios_read)
573 return 0; 593 return 0;
@@ -591,6 +611,8 @@ read_it:
591 } 611 }
592 612
593 _mark_read4write_pages_uptodate(ios_read, ret); 613 _mark_read4write_pages_uptodate(ios_read, ret);
614 ore_put_io_state(ios_read);
615 ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
594 return 0; 616 return 0;
595} 617}
596 618
@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
626 /* If first stripe, Read in all read4write pages 648 /* If first stripe, Read in all read4write pages
627 * (if needed) before we calculate the first parity. 649 * (if needed) before we calculate the first parity.
628 */ 650 */
629 _read_4_write(ios); 651 _read_4_write_first_stripe(ios);
630 } 652 }
653 if (!cur_len) /* If last stripe r4w pages of last stripe */
654 _read_4_write_last_stripe(ios);
655 _read_4_write_execute(ios);
631 656
632 for (i = 0; i < num_pages; i++) { 657 for (i = 0; i < num_pages; i++) {
633 pages[i] = _raid_page_alloc(); 658 pages[i] = _raid_page_alloc();
@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
654 679
655int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) 680int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
656{ 681{
657 struct ore_layout *layout = ios->layout;
658
659 if (ios->parity_pages) { 682 if (ios->parity_pages) {
683 struct ore_layout *layout = ios->layout;
660 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; 684 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
661 unsigned stripe_size = ios->si.bytes_in_stripe;
662 u64 last_stripe, first_stripe;
663 685
664 if (_sp2d_alloc(pages_in_unit, layout->group_width, 686 if (_sp2d_alloc(pages_in_unit, layout->group_width,
665 layout->parity, &ios->sp2d)) { 687 layout->parity, &ios->sp2d)) {
666 return -ENOMEM; 688 return -ENOMEM;
667 } 689 }
668
669 /* Round io down to last full strip */
670 first_stripe = div_u64(ios->offset, stripe_size);
671 last_stripe = div_u64(ios->offset + ios->length, stripe_size);
672
673 /* If an IO spans more then a single stripe it must end at
674 * a stripe boundary. The reminder at the end is pushed into the
675 * next IO.
676 */
677 if (last_stripe != first_stripe) {
678 ios->length = last_stripe * stripe_size - ios->offset;
679
680 BUG_ON(!ios->length);
681 ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
682 PAGE_SIZE;
683 ios->si.length = ios->length; /*make it consistent */
684 }
685 } 690 }
686 return 0; 691 return 0;
687} 692}
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
index e32bc919e4e3..5a7b691e748b 100644
--- a/fs/exofs/sys.c
+++ b/fs/exofs/sys.c
@@ -109,7 +109,7 @@ static struct kobj_type odev_ktype = {
109static struct kobj_type uuid_ktype = { 109static struct kobj_type uuid_ktype = {
110}; 110};
111 111
112void exofs_sysfs_dbg_print() 112void exofs_sysfs_dbg_print(void)
113{ 113{
114#ifdef CONFIG_EXOFS_DEBUG 114#ifdef CONFIG_EXOFS_DEBUG
115 struct kobject *k_name, *k_tmp; 115 struct kobject *k_name, *k_tmp;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e34deac3f366..6ec6f9ee2fec 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -268,7 +268,6 @@ group_extend_out:
268 err = ext4_move_extents(filp, donor_filp, me.orig_start, 268 err = ext4_move_extents(filp, donor_filp, me.orig_start,
269 me.donor_start, me.len, &me.moved_len); 269 me.donor_start, me.len, &me.moved_len);
270 mnt_drop_write_file(filp); 270 mnt_drop_write_file(filp);
271 mnt_drop_write(filp->f_path.mnt);
272 271
273 if (copy_to_user((struct move_extent __user *)arg, 272 if (copy_to_user((struct move_extent __user *)arg,
274 &me, sizeof(me))) 273 &me, sizeof(me)))
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a3d81ebf6d86..0038b32cb362 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -738,22 +738,21 @@ static int
738fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) 738fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent)
739{ 739{
740 int len = *lenp; 740 int len = *lenp;
741 u32 ipos_h, ipos_m, ipos_l; 741 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
742 loff_t i_pos;
742 743
743 if (len < 5) { 744 if (len < 5) {
744 *lenp = 5; 745 *lenp = 5;
745 return 255; /* no room */ 746 return 255; /* no room */
746 } 747 }
747 748
748 ipos_h = MSDOS_I(inode)->i_pos >> 8; 749 i_pos = fat_i_pos_read(sbi, inode);
749 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
750 ipos_l = (MSDOS_I(inode)->i_pos & 0x0f) << 28;
751 *lenp = 5; 750 *lenp = 5;
752 fh[0] = inode->i_ino; 751 fh[0] = inode->i_ino;
753 fh[1] = inode->i_generation; 752 fh[1] = inode->i_generation;
754 fh[2] = ipos_h; 753 fh[2] = i_pos >> 8;
755 fh[3] = ipos_m | MSDOS_I(inode)->i_logstart; 754 fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart;
756 fh[4] = ipos_l; 755 fh[4] = (i_pos & 0x0f) << 28;
757 if (parent) 756 if (parent)
758 fh[4] |= MSDOS_I(parent)->i_logstart; 757 fh[4] |= MSDOS_I(parent)->i_logstart;
759 return 3; 758 return 3;
diff --git a/fs/fifo.c b/fs/fifo.c
index b1a524d798e7..cf6f4345ceb0 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -14,7 +14,7 @@
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/pipe_fs_i.h> 15#include <linux/pipe_fs_i.h>
16 16
17static void wait_for_partner(struct inode* inode, unsigned int *cnt) 17static int wait_for_partner(struct inode* inode, unsigned int *cnt)
18{ 18{
19 int cur = *cnt; 19 int cur = *cnt;
20 20
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
23 if (signal_pending(current)) 23 if (signal_pending(current))
24 break; 24 break;
25 } 25 }
26 return cur == *cnt ? -ERESTARTSYS : 0;
26} 27}
27 28
28static void wake_up_partner(struct inode* inode) 29static void wake_up_partner(struct inode* inode)
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
67 * seen a writer */ 68 * seen a writer */
68 filp->f_version = pipe->w_counter; 69 filp->f_version = pipe->w_counter;
69 } else { 70 } else {
70 wait_for_partner(inode, &pipe->w_counter); 71 if (wait_for_partner(inode, &pipe->w_counter))
71 if(signal_pending(current))
72 goto err_rd; 72 goto err_rd;
73 } 73 }
74 } 74 }
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
90 wake_up_partner(inode); 90 wake_up_partner(inode);
91 91
92 if (!pipe->readers) { 92 if (!pipe->readers) {
93 wait_for_partner(inode, &pipe->r_counter); 93 if (wait_for_partner(inode, &pipe->r_counter))
94 if (signal_pending(current))
95 goto err_wr; 94 goto err_wr;
96 } 95 }
97 break; 96 break;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8d2fb8c88cf3..41a3ccff18d8 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -664,6 +664,7 @@ static long writeback_sb_inodes(struct super_block *sb,
664 /* Wait for I_SYNC. This function drops i_lock... */ 664 /* Wait for I_SYNC. This function drops i_lock... */
665 inode_sleep_on_writeback(inode); 665 inode_sleep_on_writeback(inode);
666 /* Inode may be gone, start again */ 666 /* Inode may be gone, start again */
667 spin_lock(&wb->list_lock);
667 continue; 668 continue;
668 } 669 }
669 inode->i_state |= I_SYNC; 670 inode->i_state |= I_SYNC;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index c640ba57074b..09addc8615fa 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -31,6 +31,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
31 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); 31 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
32 struct hfsplus_vh *vh = sbi->s_vhdr; 32 struct hfsplus_vh *vh = sbi->s_vhdr;
33 struct hfsplus_vh *bvh = sbi->s_backup_vhdr; 33 struct hfsplus_vh *bvh = sbi->s_backup_vhdr;
34 u32 cnid = (unsigned long)dentry->d_fsdata;
34 35
35 if (!capable(CAP_SYS_ADMIN)) 36 if (!capable(CAP_SYS_ADMIN))
36 return -EPERM; 37 return -EPERM;
@@ -41,8 +42,12 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
41 vh->finder_info[0] = bvh->finder_info[0] = 42 vh->finder_info[0] = bvh->finder_info[0] =
42 cpu_to_be32(parent_ino(dentry)); 43 cpu_to_be32(parent_ino(dentry));
43 44
44 /* Bootloader */ 45 /*
45 vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(inode->i_ino); 46 * Bootloader. Just using the inode here breaks in the case of
47 * hard links - the firmware wants the ID of the hard link file,
48 * but the inode points at the indirect inode
49 */
50 vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(cnid);
46 51
47 /* Per spec, the OS X system folder - same as finder_info[0] here */ 52 /* Per spec, the OS X system folder - same as finder_info[0] here */
48 vh->finder_info[5] = bvh->finder_info[5] = 53 vh->finder_info[5] = bvh->finder_info[5] =
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 7daf4b852d1c..90effcccca9a 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -56,7 +56,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
56 DECLARE_COMPLETION_ONSTACK(wait); 56 DECLARE_COMPLETION_ONSTACK(wait);
57 struct bio *bio; 57 struct bio *bio;
58 int ret = 0; 58 int ret = 0;
59 unsigned int io_size; 59 u64 io_size;
60 loff_t start; 60 loff_t start;
61 int offset; 61 int offset;
62 62
diff --git a/fs/locks.c b/fs/locks.c
index 814c51d0de47..fce6238d52c1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1465,7 +1465,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1465 case F_WRLCK: 1465 case F_WRLCK:
1466 return generic_add_lease(filp, arg, flp); 1466 return generic_add_lease(filp, arg, flp);
1467 default: 1467 default:
1468 BUG(); 1468 return -EINVAL;
1469 } 1469 }
1470} 1470}
1471EXPORT_SYMBOL(generic_setlease); 1471EXPORT_SYMBOL(generic_setlease);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 970659daa323..23ff18fe080a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,7 +17,6 @@
17#include <linux/kthread.h> 17#include <linux/kthread.h>
18#include <linux/sunrpc/svcauth_gss.h> 18#include <linux/sunrpc/svcauth_gss.h>
19#include <linux/sunrpc/bc_xprt.h> 19#include <linux/sunrpc/bc_xprt.h>
20#include <linux/nsproxy.h>
21 20
22#include <net/inet_sock.h> 21#include <net/inet_sock.h>
23 22
@@ -107,7 +106,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
107{ 106{
108 int ret; 107 int ret;
109 108
110 ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET, 109 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
111 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 110 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
112 if (ret <= 0) 111 if (ret <= 0)
113 goto out_err; 112 goto out_err;
@@ -115,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
115 dprintk("NFS: Callback listener port = %u (af %u)\n", 114 dprintk("NFS: Callback listener port = %u (af %u)\n",
116 nfs_callback_tcpport, PF_INET); 115 nfs_callback_tcpport, PF_INET);
117 116
118 ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6, 117 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
119 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 118 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
120 if (ret > 0) { 119 if (ret > 0) {
121 nfs_callback_tcpport6 = ret; 120 nfs_callback_tcpport6 = ret;
@@ -184,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
184 * fore channel connection. 183 * fore channel connection.
185 * Returns the input port (0) and sets the svc_serv bc_xprt on success 184 * Returns the input port (0) and sets the svc_serv bc_xprt on success
186 */ 185 */
187 ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0, 186 ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
188 SVC_SOCK_ANONYMOUS); 187 SVC_SOCK_ANONYMOUS);
189 if (ret < 0) { 188 if (ret < 0) {
190 rqstp = ERR_PTR(ret); 189 rqstp = ERR_PTR(ret);
@@ -254,7 +253,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
254 char svc_name[12]; 253 char svc_name[12];
255 int ret = 0; 254 int ret = 0;
256 int minorversion_setup; 255 int minorversion_setup;
257 struct net *net = current->nsproxy->net_ns; 256 struct net *net = &init_net;
258 257
259 mutex_lock(&nfs_callback_mutex); 258 mutex_lock(&nfs_callback_mutex);
260 if (cb_info->users++ || cb_info->task != NULL) { 259 if (cb_info->users++ || cb_info->task != NULL) {
@@ -330,7 +329,7 @@ void nfs_callback_down(int minorversion)
330 cb_info->users--; 329 cb_info->users--;
331 if (cb_info->users == 0 && cb_info->task != NULL) { 330 if (cb_info->users == 0 && cb_info->task != NULL) {
332 kthread_stop(cb_info->task); 331 kthread_stop(cb_info->task);
333 svc_shutdown_net(cb_info->serv, current->nsproxy->net_ns); 332 svc_shutdown_net(cb_info->serv, &init_net);
334 svc_exit_thread(cb_info->rqst); 333 svc_exit_thread(cb_info->rqst);
335 cb_info->serv = NULL; 334 cb_info->serv = NULL;
336 cb_info->rqst = NULL; 335 cb_info->rqst = NULL;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 95bfc243992c..e64b01d2a338 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -455,9 +455,9 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
455 args->csa_nrclists = ntohl(*p++); 455 args->csa_nrclists = ntohl(*p++);
456 args->csa_rclists = NULL; 456 args->csa_rclists = NULL;
457 if (args->csa_nrclists) { 457 if (args->csa_nrclists) {
458 args->csa_rclists = kmalloc(args->csa_nrclists * 458 args->csa_rclists = kmalloc_array(args->csa_nrclists,
459 sizeof(*args->csa_rclists), 459 sizeof(*args->csa_rclists),
460 GFP_KERNEL); 460 GFP_KERNEL);
461 if (unlikely(args->csa_rclists == NULL)) 461 if (unlikely(args->csa_rclists == NULL))
462 goto out; 462 goto out;
463 463
@@ -696,7 +696,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
696 const struct cb_sequenceres *res) 696 const struct cb_sequenceres *res)
697{ 697{
698 __be32 *p; 698 __be32 *p;
699 unsigned status = res->csr_status; 699 __be32 status = res->csr_status;
700 700
701 if (unlikely(status != 0)) 701 if (unlikely(status != 0))
702 goto out; 702 goto out;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7d108753af81..f005b5bebdc7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -207,7 +207,6 @@ error_0:
207static void nfs4_shutdown_session(struct nfs_client *clp) 207static void nfs4_shutdown_session(struct nfs_client *clp)
208{ 208{
209 if (nfs4_has_session(clp)) { 209 if (nfs4_has_session(clp)) {
210 nfs4_deviceid_purge_client(clp);
211 nfs4_destroy_session(clp->cl_session); 210 nfs4_destroy_session(clp->cl_session);
212 nfs4_destroy_clientid(clp); 211 nfs4_destroy_clientid(clp);
213 } 212 }
@@ -544,8 +543,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
544 543
545 smp_rmb(); 544 smp_rmb();
546 545
547 BUG_ON(clp->cl_cons_state != NFS_CS_READY);
548
549 dprintk("<-- %s found nfs_client %p for %s\n", 546 dprintk("<-- %s found nfs_client %p for %s\n",
550 __func__, clp, cl_init->hostname ?: ""); 547 __func__, clp, cl_init->hostname ?: "");
551 return clp; 548 return clp;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ad2775d3e219..48253372ab1d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -484,17 +484,22 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
484 484
485 list_for_each_entry_safe(req, tmp, &reqs, wb_list) { 485 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
486 if (!nfs_pageio_add_request(&desc, req)) { 486 if (!nfs_pageio_add_request(&desc, req)) {
487 nfs_list_remove_request(req);
487 nfs_list_add_request(req, &failed); 488 nfs_list_add_request(req, &failed);
488 spin_lock(cinfo.lock); 489 spin_lock(cinfo.lock);
489 dreq->flags = 0; 490 dreq->flags = 0;
490 dreq->error = -EIO; 491 dreq->error = -EIO;
491 spin_unlock(cinfo.lock); 492 spin_unlock(cinfo.lock);
492 } 493 }
494 nfs_release_request(req);
493 } 495 }
494 nfs_pageio_complete(&desc); 496 nfs_pageio_complete(&desc);
495 497
496 while (!list_empty(&failed)) 498 while (!list_empty(&failed)) {
499 req = nfs_list_entry(failed.next);
500 nfs_list_remove_request(req);
497 nfs_unlock_and_release_request(req); 501 nfs_unlock_and_release_request(req);
502 }
498 503
499 if (put_dreq(dreq)) 504 if (put_dreq(dreq))
500 nfs_direct_write_complete(dreq, dreq->inode); 505 nfs_direct_write_complete(dreq, dreq->inode);
@@ -523,9 +528,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
523 nfs_list_remove_request(req); 528 nfs_list_remove_request(req);
524 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { 529 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
525 /* Note the rewrite will go through mds */ 530 /* Note the rewrite will go through mds */
526 kref_get(&req->wb_kref);
527 nfs_mark_request_commit(req, NULL, &cinfo); 531 nfs_mark_request_commit(req, NULL, &cinfo);
528 } 532 } else
533 nfs_release_request(req);
529 nfs_unlock_and_release_request(req); 534 nfs_unlock_and_release_request(req);
530 } 535 }
531 536
@@ -716,12 +721,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
716 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) 721 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
717 bit = NFS_IOHDR_NEED_RESCHED; 722 bit = NFS_IOHDR_NEED_RESCHED;
718 else if (dreq->flags == 0) { 723 else if (dreq->flags == 0) {
719 memcpy(&dreq->verf, &req->wb_verf, 724 memcpy(&dreq->verf, hdr->verf,
720 sizeof(dreq->verf)); 725 sizeof(dreq->verf));
721 bit = NFS_IOHDR_NEED_COMMIT; 726 bit = NFS_IOHDR_NEED_COMMIT;
722 dreq->flags = NFS_ODIRECT_DO_COMMIT; 727 dreq->flags = NFS_ODIRECT_DO_COMMIT;
723 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { 728 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
724 if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) { 729 if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
725 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 730 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
726 bit = NFS_IOHDR_NEED_RESCHED; 731 bit = NFS_IOHDR_NEED_RESCHED;
727 } else 732 } else
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b5b86a05059c..864c51e4b400 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,11 @@ unsigned int nfs_idmap_cache_timeout = 600;
57static const struct cred *id_resolver_cache; 57static const struct cred *id_resolver_cache;
58static struct key_type key_type_id_resolver_legacy; 58static struct key_type key_type_id_resolver_legacy;
59 59
60struct idmap {
61 struct rpc_pipe *idmap_pipe;
62 struct key_construction *idmap_key_cons;
63 struct mutex idmap_mutex;
64};
60 65
61/** 66/**
62 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields 67 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
@@ -310,9 +315,11 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
310 name, namelen, type, data, 315 name, namelen, type, data,
311 data_size, NULL); 316 data_size, NULL);
312 if (ret < 0) { 317 if (ret < 0) {
318 mutex_lock(&idmap->idmap_mutex);
313 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, 319 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
314 name, namelen, type, data, 320 name, namelen, type, data,
315 data_size, idmap); 321 data_size, idmap);
322 mutex_unlock(&idmap->idmap_mutex);
316 } 323 }
317 return ret; 324 return ret;
318} 325}
@@ -354,11 +361,6 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ
354/* idmap classic begins here */ 361/* idmap classic begins here */
355module_param(nfs_idmap_cache_timeout, int, 0644); 362module_param(nfs_idmap_cache_timeout, int, 0644);
356 363
357struct idmap {
358 struct rpc_pipe *idmap_pipe;
359 struct key_construction *idmap_key_cons;
360};
361
362enum { 364enum {
363 Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err 365 Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
364}; 366};
@@ -469,6 +471,7 @@ nfs_idmap_new(struct nfs_client *clp)
469 return error; 471 return error;
470 } 472 }
471 idmap->idmap_pipe = pipe; 473 idmap->idmap_pipe = pipe;
474 mutex_init(&idmap->idmap_mutex);
472 475
473 clp->cl_idmap = idmap; 476 clp->cl_idmap = idmap;
474 return 0; 477 return 0;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e605d695dbcb..f7296983eba6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1530,7 +1530,6 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1530 nfsi->delegation_state = 0; 1530 nfsi->delegation_state = 0;
1531 init_rwsem(&nfsi->rwsem); 1531 init_rwsem(&nfsi->rwsem);
1532 nfsi->layout = NULL; 1532 nfsi->layout = NULL;
1533 atomic_set(&nfsi->commit_info.rpcs_out, 0);
1534#endif 1533#endif
1535} 1534}
1536 1535
@@ -1545,6 +1544,7 @@ static void init_once(void *foo)
1545 INIT_LIST_HEAD(&nfsi->commit_info.list); 1544 INIT_LIST_HEAD(&nfsi->commit_info.list);
1546 nfsi->npages = 0; 1545 nfsi->npages = 0;
1547 nfsi->commit_info.ncommit = 0; 1546 nfsi->commit_info.ncommit = 0;
1547 atomic_set(&nfsi->commit_info.rpcs_out, 0);
1548 atomic_set(&nfsi->silly_count, 1); 1548 atomic_set(&nfsi->silly_count, 1);
1549 INIT_HLIST_HEAD(&nfsi->silly_list); 1549 INIT_HLIST_HEAD(&nfsi->silly_list);
1550 init_waitqueue_head(&nfsi->waitqueue); 1550 init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c6827f93ab57..cc5900ac61b5 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -295,7 +295,7 @@ is_ds_client(struct nfs_client *clp)
295 295
296extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; 296extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
297 297
298extern const u32 nfs4_fattr_bitmap[2]; 298extern const u32 nfs4_fattr_bitmap[3];
299extern const u32 nfs4_statfs_bitmap[2]; 299extern const u32 nfs4_statfs_bitmap[2];
300extern const u32 nfs4_pathconf_bitmap[2]; 300extern const u32 nfs4_pathconf_bitmap[2];
301extern const u32 nfs4_fsinfo_bitmap[3]; 301extern const u32 nfs4_fsinfo_bitmap[3];
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d48dbefa0e71..15fc7e4664ed 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -105,6 +105,8 @@ static int nfs4_map_errors(int err)
105 return -EINVAL; 105 return -EINVAL;
106 case -NFS4ERR_SHARE_DENIED: 106 case -NFS4ERR_SHARE_DENIED:
107 return -EACCES; 107 return -EACCES;
108 case -NFS4ERR_MINOR_VERS_MISMATCH:
109 return -EPROTONOSUPPORT;
108 default: 110 default:
109 dprintk("%s could not handle NFSv4 error %d\n", 111 dprintk("%s could not handle NFSv4 error %d\n",
110 __func__, -err); 112 __func__, -err);
@@ -116,7 +118,7 @@ static int nfs4_map_errors(int err)
116/* 118/*
117 * This is our standard bitmap for GETATTR requests. 119 * This is our standard bitmap for GETATTR requests.
118 */ 120 */
119const u32 nfs4_fattr_bitmap[2] = { 121const u32 nfs4_fattr_bitmap[3] = {
120 FATTR4_WORD0_TYPE 122 FATTR4_WORD0_TYPE
121 | FATTR4_WORD0_CHANGE 123 | FATTR4_WORD0_CHANGE
122 | FATTR4_WORD0_SIZE 124 | FATTR4_WORD0_SIZE
@@ -133,6 +135,24 @@ const u32 nfs4_fattr_bitmap[2] = {
133 | FATTR4_WORD1_TIME_MODIFY 135 | FATTR4_WORD1_TIME_MODIFY
134}; 136};
135 137
138static const u32 nfs4_pnfs_open_bitmap[3] = {
139 FATTR4_WORD0_TYPE
140 | FATTR4_WORD0_CHANGE
141 | FATTR4_WORD0_SIZE
142 | FATTR4_WORD0_FSID
143 | FATTR4_WORD0_FILEID,
144 FATTR4_WORD1_MODE
145 | FATTR4_WORD1_NUMLINKS
146 | FATTR4_WORD1_OWNER
147 | FATTR4_WORD1_OWNER_GROUP
148 | FATTR4_WORD1_RAWDEV
149 | FATTR4_WORD1_SPACE_USED
150 | FATTR4_WORD1_TIME_ACCESS
151 | FATTR4_WORD1_TIME_METADATA
152 | FATTR4_WORD1_TIME_MODIFY,
153 FATTR4_WORD2_MDSTHRESHOLD
154};
155
136const u32 nfs4_statfs_bitmap[2] = { 156const u32 nfs4_statfs_bitmap[2] = {
137 FATTR4_WORD0_FILES_AVAIL 157 FATTR4_WORD0_FILES_AVAIL
138 | FATTR4_WORD0_FILES_FREE 158 | FATTR4_WORD0_FILES_FREE
@@ -844,6 +864,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
844 p->o_arg.name = &dentry->d_name; 864 p->o_arg.name = &dentry->d_name;
845 p->o_arg.server = server; 865 p->o_arg.server = server;
846 p->o_arg.bitmask = server->attr_bitmask; 866 p->o_arg.bitmask = server->attr_bitmask;
867 p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
847 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 868 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
848 if (attrs != NULL && attrs->ia_valid != 0) { 869 if (attrs != NULL && attrs->ia_valid != 0) {
849 __be32 verf[2]; 870 __be32 verf[2];
@@ -1820,6 +1841,7 @@ static int _nfs4_do_open(struct inode *dir,
1820 opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); 1841 opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
1821 if (!opendata->f_attr.mdsthreshold) 1842 if (!opendata->f_attr.mdsthreshold)
1822 goto err_opendata_put; 1843 goto err_opendata_put;
1844 opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
1823 } 1845 }
1824 if (dentry->d_inode != NULL) 1846 if (dentry->d_inode != NULL)
1825 opendata->state = nfs4_get_open_state(dentry->d_inode, sp); 1847 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
@@ -1880,6 +1902,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
1880 struct nfs4_state *res; 1902 struct nfs4_state *res;
1881 int status; 1903 int status;
1882 1904
1905 fmode &= FMODE_READ|FMODE_WRITE;
1883 do { 1906 do {
1884 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, 1907 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
1885 &res, ctx_th); 1908 &res, ctx_th);
@@ -2526,6 +2549,14 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2526 2549
2527 nfs_fattr_init(fattr); 2550 nfs_fattr_init(fattr);
2528 2551
2552 /* Deal with open(O_TRUNC) */
2553 if (sattr->ia_valid & ATTR_OPEN)
2554 sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
2555
2556 /* Optimization: if the end result is no change, don't RPC */
2557 if ((sattr->ia_valid & ~(ATTR_FILE)) == 0)
2558 return 0;
2559
2529 /* Search for an existing open(O_WRITE) file */ 2560 /* Search for an existing open(O_WRITE) file */
2530 if (sattr->ia_valid & ATTR_FILE) { 2561 if (sattr->ia_valid & ATTR_FILE) {
2531 struct nfs_open_context *ctx; 2562 struct nfs_open_context *ctx;
@@ -2537,10 +2568,6 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2537 } 2568 }
2538 } 2569 }
2539 2570
2540 /* Deal with open(O_TRUNC) */
2541 if (sattr->ia_valid & ATTR_OPEN)
2542 sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
2543
2544 status = nfs4_do_setattr(inode, cred, fattr, sattr, state); 2571 status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
2545 if (status == 0) 2572 if (status == 0)
2546 nfs_setattr_update_inode(inode, sattr); 2573 nfs_setattr_update_inode(inode, sattr);
@@ -5275,7 +5302,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
5275 5302
5276 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5303 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5277 if (status) 5304 if (status)
5278 pr_warn("NFS: Got error %d from the server %s on " 5305 dprintk("NFS: Got error %d from the server %s on "
5279 "DESTROY_CLIENTID.", status, clp->cl_hostname); 5306 "DESTROY_CLIENTID.", status, clp->cl_hostname);
5280 return status; 5307 return status;
5281} 5308}
@@ -5746,8 +5773,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,
5746 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5773 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5747 5774
5748 if (status) 5775 if (status)
5749 printk(KERN_WARNING 5776 dprintk("NFS: Got error %d from the server on DESTROY_SESSION. "
5750 "NFS: Got error %d from the server on DESTROY_SESSION. "
5751 "Session has been destroyed regardless...\n", status); 5777 "Session has been destroyed regardless...\n", status);
5752 5778
5753 dprintk("<-- nfs4_proc_destroy_session\n"); 5779 dprintk("<-- nfs4_proc_destroy_session\n");
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c679b9ecef63..f38300e9f171 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -244,6 +244,16 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
244 return nfs4_wait_on_slot_tbl(&ses->fc_slot_table); 244 return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
245} 245}
246 246
247static void nfs41_finish_session_reset(struct nfs_client *clp)
248{
249 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
250 clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
251 /* create_session negotiated new slot table */
252 clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
253 clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
254 nfs41_setup_state_renewal(clp);
255}
256
247int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 257int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
248{ 258{
249 int status; 259 int status;
@@ -259,8 +269,7 @@ do_confirm:
259 status = nfs4_proc_create_session(clp, cred); 269 status = nfs4_proc_create_session(clp, cred);
260 if (status != 0) 270 if (status != 0)
261 goto out; 271 goto out;
262 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 272 nfs41_finish_session_reset(clp);
263 nfs41_setup_state_renewal(clp);
264 nfs_mark_client_ready(clp, NFS_CS_READY); 273 nfs_mark_client_ready(clp, NFS_CS_READY);
265out: 274out:
266 return status; 275 return status;
@@ -1772,16 +1781,9 @@ static int nfs4_reset_session(struct nfs_client *clp)
1772 status = nfs4_handle_reclaim_lease_error(clp, status); 1781 status = nfs4_handle_reclaim_lease_error(clp, status);
1773 goto out; 1782 goto out;
1774 } 1783 }
1775 clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1784 nfs41_finish_session_reset(clp);
1776 /* create_session negotiated new slot table */
1777 clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1778 clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1779 dprintk("%s: session reset was successful for server %s!\n", 1785 dprintk("%s: session reset was successful for server %s!\n",
1780 __func__, clp->cl_hostname); 1786 __func__, clp->cl_hostname);
1781
1782 /* Let the state manager reestablish state */
1783 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1784 nfs41_setup_state_renewal(clp);
1785out: 1787out:
1786 if (cred) 1788 if (cred)
1787 put_rpccred(cred); 1789 put_rpccred(cred);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ee4a74db95d0..18fae29b0301 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1198,12 +1198,13 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
1198} 1198}
1199 1199
1200static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, 1200static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
1201 const u32 *open_bitmap,
1201 struct compound_hdr *hdr) 1202 struct compound_hdr *hdr)
1202{ 1203{
1203 encode_getattr_three(xdr, 1204 encode_getattr_three(xdr,
1204 bitmask[0] & nfs4_fattr_bitmap[0], 1205 bitmask[0] & open_bitmap[0],
1205 bitmask[1] & nfs4_fattr_bitmap[1], 1206 bitmask[1] & open_bitmap[1],
1206 bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD, 1207 bitmask[2] & open_bitmap[2],
1207 hdr); 1208 hdr);
1208} 1209}
1209 1210
@@ -2221,7 +2222,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
2221 encode_putfh(xdr, args->fh, &hdr); 2222 encode_putfh(xdr, args->fh, &hdr);
2222 encode_open(xdr, args, &hdr); 2223 encode_open(xdr, args, &hdr);
2223 encode_getfh(xdr, &hdr); 2224 encode_getfh(xdr, &hdr);
2224 encode_getfattr_open(xdr, args->bitmask, &hdr); 2225 encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
2225 encode_nops(&hdr); 2226 encode_nops(&hdr);
2226} 2227}
2227 2228
@@ -4359,7 +4360,10 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
4359 4360
4360 if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U))) 4361 if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
4361 return -EIO; 4362 return -EIO;
4362 if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { 4363 if (bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD) {
4364 /* Did the server return an unrequested attribute? */
4365 if (unlikely(res == NULL))
4366 return -EREMOTEIO;
4363 p = xdr_inline_decode(xdr, 4); 4367 p = xdr_inline_decode(xdr, 4);
4364 if (unlikely(!p)) 4368 if (unlikely(!p))
4365 goto out_overflow; 4369 goto out_overflow;
@@ -4372,6 +4376,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
4372 __func__); 4376 __func__);
4373 4377
4374 status = decode_first_threshold_item4(xdr, res); 4378 status = decode_first_threshold_item4(xdr, res);
4379 bitmap[2] &= ~FATTR4_WORD2_MDSTHRESHOLD;
4375 } 4380 }
4376 return status; 4381 return status;
4377out_overflow: 4382out_overflow:
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index b47277baebab..f50d3e8d6f22 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
454 objios->ios->done = _read_done; 454 objios->ios->done = _read_done;
455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
456 rdata->args.offset, rdata->args.count); 456 rdata->args.offset, rdata->args.count);
457 return ore_read(objios->ios); 457 ret = ore_read(objios->ios);
458 if (unlikely(ret))
459 objio_free_result(&objios->oir);
460 return ret;
458} 461}
459 462
460/* 463/*
@@ -486,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
486 struct nfs_write_data *wdata = objios->oir.rpcdata; 489 struct nfs_write_data *wdata = objios->oir.rpcdata;
487 struct address_space *mapping = wdata->header->inode->i_mapping; 490 struct address_space *mapping = wdata->header->inode->i_mapping;
488 pgoff_t index = offset / PAGE_SIZE; 491 pgoff_t index = offset / PAGE_SIZE;
489 struct page *page = find_get_page(mapping, index); 492 struct page *page;
493 loff_t i_size = i_size_read(wdata->header->inode);
494
495 if (offset >= i_size) {
496 *uptodate = true;
497 dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
498 return ZERO_PAGE(0);
499 }
490 500
501 page = find_get_page(mapping, index);
491 if (!page) { 502 if (!page) {
492 page = find_or_create_page(mapping, index, GFP_NOFS); 503 page = find_or_create_page(mapping, index, GFP_NOFS);
493 if (unlikely(!page)) { 504 if (unlikely(!page)) {
@@ -507,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
507 518
508static void __r4w_put_page(void *priv, struct page *page) 519static void __r4w_put_page(void *priv, struct page *page)
509{ 520{
510 dprintk("%s: index=0x%lx\n", __func__, page->index); 521 dprintk("%s: index=0x%lx\n", __func__,
511 page_cache_release(page); 522 (page == ZERO_PAGE(0)) ? -1UL : page->index);
523 if (ZERO_PAGE(0) != page)
524 page_cache_release(page);
512 return; 525 return;
513} 526}
514 527
@@ -539,8 +552,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
539 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 552 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
540 wdata->args.offset, wdata->args.count); 553 wdata->args.offset, wdata->args.count);
541 ret = ore_write(objios->ios); 554 ret = ore_write(objios->ios);
542 if (unlikely(ret)) 555 if (unlikely(ret)) {
556 objio_free_result(&objios->oir);
543 return ret; 557 return ret;
558 }
544 559
545 if (objios->sync) 560 if (objios->sync)
546 _write_done(objios->ios, objios); 561 _write_done(objios->ios, objios);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b8323aa7b543..bbc49caa7a82 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -70,6 +70,10 @@ find_pnfs_driver(u32 id)
70 70
71 spin_lock(&pnfs_spinlock); 71 spin_lock(&pnfs_spinlock);
72 local = find_pnfs_driver_locked(id); 72 local = find_pnfs_driver_locked(id);
73 if (local != NULL && !try_module_get(local->owner)) {
74 dprintk("%s: Could not grab reference on module\n", __func__);
75 local = NULL;
76 }
73 spin_unlock(&pnfs_spinlock); 77 spin_unlock(&pnfs_spinlock);
74 return local; 78 return local;
75} 79}
@@ -80,6 +84,9 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
80 if (nfss->pnfs_curr_ld) { 84 if (nfss->pnfs_curr_ld) {
81 if (nfss->pnfs_curr_ld->clear_layoutdriver) 85 if (nfss->pnfs_curr_ld->clear_layoutdriver)
82 nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 86 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
87 /* Decrement the MDS count. Purge the deviceid cache if zero */
88 if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
89 nfs4_deviceid_purge_client(nfss->nfs_client);
83 module_put(nfss->pnfs_curr_ld->owner); 90 module_put(nfss->pnfs_curr_ld->owner);
84 } 91 }
85 nfss->pnfs_curr_ld = NULL; 92 nfss->pnfs_curr_ld = NULL;
@@ -115,10 +122,6 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
115 goto out_no_driver; 122 goto out_no_driver;
116 } 123 }
117 } 124 }
118 if (!try_module_get(ld_type->owner)) {
119 dprintk("%s: Could not grab reference on module\n", __func__);
120 goto out_no_driver;
121 }
122 server->pnfs_curr_ld = ld_type; 125 server->pnfs_curr_ld = ld_type;
123 if (ld_type->set_layoutdriver 126 if (ld_type->set_layoutdriver
124 && ld_type->set_layoutdriver(server, mntfh)) { 127 && ld_type->set_layoutdriver(server, mntfh)) {
@@ -127,6 +130,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
127 module_put(ld_type->owner); 130 module_put(ld_type->owner);
128 goto out_no_driver; 131 goto out_no_driver;
129 } 132 }
133 /* Bump the MDS count */
134 atomic_inc(&server->nfs_client->cl_mds_count);
130 135
131 dprintk("%s: pNFS module for %u set\n", __func__, id); 136 dprintk("%s: pNFS module for %u set\n", __func__, id);
132 return; 137 return;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 29fd23c0efdc..64f90d845f6a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -365,7 +365,7 @@ static inline bool
365pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src, 365pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
366 struct nfs_server *nfss) 366 struct nfs_server *nfss)
367{ 367{
368 return (dst && src && src->bm != 0 && 368 return (dst && src && src->bm != 0 && nfss->pnfs_curr_ld &&
369 nfss->pnfs_curr_ld->id == src->l_type); 369 nfss->pnfs_curr_ld->id == src->l_type);
370} 370}
371 371
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index a706b6bcc286..617c7419a08e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -651,7 +651,7 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
651 /* Emulate the eof flag, which isn't normally needed in NFSv2 651 /* Emulate the eof flag, which isn't normally needed in NFSv2
652 * as it is guaranteed to always return the file attributes 652 * as it is guaranteed to always return the file attributes
653 */ 653 */
654 if (data->args.offset + data->args.count >= data->res.fattr->size) 654 if (data->args.offset + data->res.count >= data->res.fattr->size)
655 data->res.eof = 1; 655 data->res.eof = 1;
656 } 656 }
657 return 0; 657 return 0;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ff656c022684..06228192f64e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1867,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options,
1867 if (data == NULL) 1867 if (data == NULL)
1868 goto out_no_data; 1868 goto out_no_data;
1869 1869
1870 args->version = NFS_DEFAULT_VERSION;
1870 switch (data->version) { 1871 switch (data->version) {
1871 case 1: 1872 case 1:
1872 data->namlen = 0; 1873 data->namlen = 0;
@@ -2637,6 +2638,8 @@ static int nfs4_validate_mount_data(void *options,
2637 if (data == NULL) 2638 if (data == NULL)
2638 goto out_no_data; 2639 goto out_no_data;
2639 2640
2641 args->version = 4;
2642
2640 switch (data->version) { 2643 switch (data->version) {
2641 case 1: 2644 case 1:
2642 if (data->host_addrlen > sizeof(args->nfs_server.address)) 2645 if (data->host_addrlen > sizeof(args->nfs_server.address))
@@ -2857,6 +2860,8 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2857 2860
2858 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2861 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2859 2862
2863 mount_info->fill_super = nfs4_fill_super;
2864
2860 export_path = data->nfs_server.export_path; 2865 export_path = data->nfs_server.export_path;
2861 data->nfs_server.export_path = "/"; 2866 data->nfs_server.export_path = "/";
2862 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, 2867 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e6fe3d69d14c..4d6861c0dc14 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -80,6 +80,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void)
80 INIT_LIST_HEAD(&hdr->rpc_list); 80 INIT_LIST_HEAD(&hdr->rpc_list);
81 spin_lock_init(&hdr->lock); 81 spin_lock_init(&hdr->lock);
82 atomic_set(&hdr->refcnt, 0); 82 atomic_set(&hdr->refcnt, 0);
83 hdr->verf = &p->verf;
83 } 84 }
84 return p; 85 return p;
85} 86}
@@ -619,6 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
619 goto next; 620 goto next;
620 } 621 }
621 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { 622 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
623 memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf));
622 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 624 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
623 goto next; 625 goto next;
624 } 626 }
@@ -1255,15 +1257,14 @@ static void nfs_writeback_release_common(void *calldata)
1255 struct nfs_write_data *data = calldata; 1257 struct nfs_write_data *data = calldata;
1256 struct nfs_pgio_header *hdr = data->header; 1258 struct nfs_pgio_header *hdr = data->header;
1257 int status = data->task.tk_status; 1259 int status = data->task.tk_status;
1258 struct nfs_page *req = hdr->req;
1259 1260
1260 if ((status >= 0) && nfs_write_need_commit(data)) { 1261 if ((status >= 0) && nfs_write_need_commit(data)) {
1261 spin_lock(&hdr->lock); 1262 spin_lock(&hdr->lock);
1262 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) 1263 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1263 ; /* Do nothing */ 1264 ; /* Do nothing */
1264 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) 1265 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1265 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1266 memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
1266 else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) 1267 else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
1267 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); 1268 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1268 spin_unlock(&hdr->lock); 1269 spin_unlock(&hdr->lock);
1269 } 1270 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8fdc9ec5c5d3..94effd5bc4a1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -900,7 +900,7 @@ static void free_session(struct kref *kref)
900 struct nfsd4_session *ses; 900 struct nfsd4_session *ses;
901 int mem; 901 int mem;
902 902
903 BUG_ON(!spin_is_locked(&client_lock)); 903 lockdep_assert_held(&client_lock);
904 ses = container_of(kref, struct nfsd4_session, se_ref); 904 ses = container_of(kref, struct nfsd4_session, se_ref);
905 nfsd4_del_conns(ses); 905 nfsd4_del_conns(ses);
906 spin_lock(&nfsd_drc_lock); 906 spin_lock(&nfsd_drc_lock);
@@ -1080,7 +1080,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
1080static inline void 1080static inline void
1081free_client(struct nfs4_client *clp) 1081free_client(struct nfs4_client *clp)
1082{ 1082{
1083 BUG_ON(!spin_is_locked(&client_lock)); 1083 lockdep_assert_held(&client_lock);
1084 while (!list_empty(&clp->cl_sessions)) { 1084 while (!list_empty(&clp->cl_sessions)) {
1085 struct nfsd4_session *ses; 1085 struct nfsd4_session *ses;
1086 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, 1086 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 08a07a218d26..57ceaf33d177 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -191,6 +191,8 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
191 while (!list_empty(head)) { 191 while (!list_empty(head)) {
192 ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); 192 ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
193 list_del_init(&ii->i_dirty); 193 list_del_init(&ii->i_dirty);
194 truncate_inode_pages(&ii->vfs_inode.i_data, 0);
195 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
194 iput(&ii->vfs_inode); 196 iput(&ii->vfs_inode);
195 } 197 }
196} 198}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 0e72ad6f22aa..88e11fb346b6 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2309,6 +2309,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2309 if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) 2309 if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
2310 continue; 2310 continue;
2311 list_del_init(&ii->i_dirty); 2311 list_del_init(&ii->i_dirty);
2312 truncate_inode_pages(&ii->vfs_inode.i_data, 0);
2313 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
2312 iput(&ii->vfs_inode); 2314 iput(&ii->vfs_inode);
2313 } 2315 }
2314} 2316}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 81a4cd22f80b..4f7795fb5fc0 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -456,7 +456,7 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
456 stats->ls_gets++; 456 stats->ls_gets++;
457 stats->ls_total += ktime_to_ns(kt); 457 stats->ls_total += ktime_to_ns(kt);
458 /* overflow */ 458 /* overflow */
459 if (unlikely(stats->ls_gets) == 0) { 459 if (unlikely(stats->ls_gets == 0)) {
460 stats->ls_gets++; 460 stats->ls_gets++;
461 stats->ls_total = ktime_to_ns(kt); 461 stats->ls_total = ktime_to_ns(kt);
462 } 462 }
@@ -3932,6 +3932,8 @@ unqueue:
3932static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, 3932static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3933 struct ocfs2_lock_res *lockres) 3933 struct ocfs2_lock_res *lockres)
3934{ 3934{
3935 unsigned long flags;
3936
3935 assert_spin_locked(&lockres->l_lock); 3937 assert_spin_locked(&lockres->l_lock);
3936 3938
3937 if (lockres->l_flags & OCFS2_LOCK_FREEING) { 3939 if (lockres->l_flags & OCFS2_LOCK_FREEING) {
@@ -3945,21 +3947,22 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3945 3947
3946 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); 3948 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
3947 3949
3948 spin_lock(&osb->dc_task_lock); 3950 spin_lock_irqsave(&osb->dc_task_lock, flags);
3949 if (list_empty(&lockres->l_blocked_list)) { 3951 if (list_empty(&lockres->l_blocked_list)) {
3950 list_add_tail(&lockres->l_blocked_list, 3952 list_add_tail(&lockres->l_blocked_list,
3951 &osb->blocked_lock_list); 3953 &osb->blocked_lock_list);
3952 osb->blocked_lock_count++; 3954 osb->blocked_lock_count++;
3953 } 3955 }
3954 spin_unlock(&osb->dc_task_lock); 3956 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3955} 3957}
3956 3958
3957static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) 3959static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
3958{ 3960{
3959 unsigned long processed; 3961 unsigned long processed;
3962 unsigned long flags;
3960 struct ocfs2_lock_res *lockres; 3963 struct ocfs2_lock_res *lockres;
3961 3964
3962 spin_lock(&osb->dc_task_lock); 3965 spin_lock_irqsave(&osb->dc_task_lock, flags);
3963 /* grab this early so we know to try again if a state change and 3966 /* grab this early so we know to try again if a state change and
3964 * wake happens part-way through our work */ 3967 * wake happens part-way through our work */
3965 osb->dc_work_sequence = osb->dc_wake_sequence; 3968 osb->dc_work_sequence = osb->dc_wake_sequence;
@@ -3972,38 +3975,40 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
3972 struct ocfs2_lock_res, l_blocked_list); 3975 struct ocfs2_lock_res, l_blocked_list);
3973 list_del_init(&lockres->l_blocked_list); 3976 list_del_init(&lockres->l_blocked_list);
3974 osb->blocked_lock_count--; 3977 osb->blocked_lock_count--;
3975 spin_unlock(&osb->dc_task_lock); 3978 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3976 3979
3977 BUG_ON(!processed); 3980 BUG_ON(!processed);
3978 processed--; 3981 processed--;
3979 3982
3980 ocfs2_process_blocked_lock(osb, lockres); 3983 ocfs2_process_blocked_lock(osb, lockres);
3981 3984
3982 spin_lock(&osb->dc_task_lock); 3985 spin_lock_irqsave(&osb->dc_task_lock, flags);
3983 } 3986 }
3984 spin_unlock(&osb->dc_task_lock); 3987 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3985} 3988}
3986 3989
3987static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) 3990static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
3988{ 3991{
3989 int empty = 0; 3992 int empty = 0;
3993 unsigned long flags;
3990 3994
3991 spin_lock(&osb->dc_task_lock); 3995 spin_lock_irqsave(&osb->dc_task_lock, flags);
3992 if (list_empty(&osb->blocked_lock_list)) 3996 if (list_empty(&osb->blocked_lock_list))
3993 empty = 1; 3997 empty = 1;
3994 3998
3995 spin_unlock(&osb->dc_task_lock); 3999 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
3996 return empty; 4000 return empty;
3997} 4001}
3998 4002
3999static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) 4003static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
4000{ 4004{
4001 int should_wake = 0; 4005 int should_wake = 0;
4006 unsigned long flags;
4002 4007
4003 spin_lock(&osb->dc_task_lock); 4008 spin_lock_irqsave(&osb->dc_task_lock, flags);
4004 if (osb->dc_work_sequence != osb->dc_wake_sequence) 4009 if (osb->dc_work_sequence != osb->dc_wake_sequence)
4005 should_wake = 1; 4010 should_wake = 1;
4006 spin_unlock(&osb->dc_task_lock); 4011 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
4007 4012
4008 return should_wake; 4013 return should_wake;
4009} 4014}
@@ -4033,10 +4038,12 @@ static int ocfs2_downconvert_thread(void *arg)
4033 4038
4034void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) 4039void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
4035{ 4040{
4036 spin_lock(&osb->dc_task_lock); 4041 unsigned long flags;
4042
4043 spin_lock_irqsave(&osb->dc_task_lock, flags);
4037 /* make sure the voting thread gets a swipe at whatever changes 4044 /* make sure the voting thread gets a swipe at whatever changes
4038 * the caller may have made to the voting state */ 4045 * the caller may have made to the voting state */
4039 osb->dc_wake_sequence++; 4046 osb->dc_wake_sequence++;
4040 spin_unlock(&osb->dc_task_lock); 4047 spin_unlock_irqrestore(&osb->dc_task_lock, flags);
4041 wake_up(&osb->dc_event); 4048 wake_up(&osb->dc_event);
4042} 4049}
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2f5b92ef0e53..70b5863a2d64 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -923,8 +923,6 @@ out_unlock:
923 923
924 ocfs2_inode_unlock(inode, 0); 924 ocfs2_inode_unlock(inode, 0);
925out: 925out:
926 if (ret && ret != -ENXIO)
927 ret = -ENXIO;
928 return ret; 926 return ret;
929} 927}
930 928
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 061591a3ab08..7602783d7f41 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1950,7 +1950,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
1950 if (ret < 0) 1950 if (ret < 0)
1951 mlog_errno(ret); 1951 mlog_errno(ret);
1952 1952
1953 if (file->f_flags & O_SYNC) 1953 if (file && (file->f_flags & O_SYNC))
1954 handle->h_sync = 1; 1954 handle->h_sync = 1;
1955 1955
1956 ocfs2_commit_trans(osb, handle); 1956 ocfs2_commit_trans(osb, handle);
@@ -2422,8 +2422,10 @@ out_dio:
2422 unaligned_dio = 0; 2422 unaligned_dio = 0;
2423 } 2423 }
2424 2424
2425 if (unaligned_dio) 2425 if (unaligned_dio) {
2426 ocfs2_iocb_clear_unaligned_aio(iocb);
2426 atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio); 2427 atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio);
2428 }
2427 2429
2428out: 2430out:
2429 if (rw_level != -1) 2431 if (rw_level != -1)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 92fcd575775a..0a86e302655f 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -399,8 +399,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
399 msecs_to_jiffies(oinfo->dqi_syncms)); 399 msecs_to_jiffies(oinfo->dqi_syncms));
400 400
401out_err: 401out_err:
402 if (status)
403 mlog_errno(status);
404 return status; 402 return status;
405out_unlock: 403out_unlock:
406 ocfs2_unlock_global_qf(oinfo, 0); 404 ocfs2_unlock_global_qf(oinfo, 0);
diff --git a/fs/open.c b/fs/open.c
index d6c79a0dffc7..1540632d8387 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -397,10 +397,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
397{ 397{
398 struct file *file; 398 struct file *file;
399 struct inode *inode; 399 struct inode *inode;
400 int error; 400 int error, fput_needed;
401 401
402 error = -EBADF; 402 error = -EBADF;
403 file = fget(fd); 403 file = fget_raw_light(fd, &fput_needed);
404 if (!file) 404 if (!file)
405 goto out; 405 goto out;
406 406
@@ -414,7 +414,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
414 if (!error) 414 if (!error)
415 set_fs_pwd(current->fs, &file->f_path); 415 set_fs_pwd(current->fs, &file->f_path);
416out_putf: 416out_putf:
417 fput(file); 417 fput_light(file, fput_needed);
418out: 418out:
419 return error; 419 return error;
420} 420}
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index aeb19e68e086..11a2aa2a56c4 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -258,7 +258,7 @@ fail:
258 return rc; 258 return rc;
259} 259}
260 260
261int pstore_fill_super(struct super_block *sb, void *data, int silent) 261static int pstore_fill_super(struct super_block *sb, void *data, int silent)
262{ 262{
263 struct inode *inode; 263 struct inode *inode;
264 264
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 82c585f715e3..03ce7a9b81cc 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -94,20 +94,15 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
94 * as we can from the end of the buffer. 94 * as we can from the end of the buffer.
95 */ 95 */
96static void pstore_dump(struct kmsg_dumper *dumper, 96static void pstore_dump(struct kmsg_dumper *dumper,
97 enum kmsg_dump_reason reason, 97 enum kmsg_dump_reason reason)
98 const char *s1, unsigned long l1,
99 const char *s2, unsigned long l2)
100{ 98{
101 unsigned long s1_start, s2_start; 99 unsigned long total = 0;
102 unsigned long l1_cpy, l2_cpy;
103 unsigned long size, total = 0;
104 char *dst;
105 const char *why; 100 const char *why;
106 u64 id; 101 u64 id;
107 int hsize, ret;
108 unsigned int part = 1; 102 unsigned int part = 1;
109 unsigned long flags = 0; 103 unsigned long flags = 0;
110 int is_locked = 0; 104 int is_locked = 0;
105 int ret;
111 106
112 why = get_reason_str(reason); 107 why = get_reason_str(reason);
113 108
@@ -119,30 +114,25 @@ static void pstore_dump(struct kmsg_dumper *dumper,
119 spin_lock_irqsave(&psinfo->buf_lock, flags); 114 spin_lock_irqsave(&psinfo->buf_lock, flags);
120 oopscount++; 115 oopscount++;
121 while (total < kmsg_bytes) { 116 while (total < kmsg_bytes) {
117 char *dst;
118 unsigned long size;
119 int hsize;
120 size_t len;
121
122 dst = psinfo->buf; 122 dst = psinfo->buf;
123 hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part); 123 hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part);
124 size = psinfo->bufsize - hsize; 124 size = psinfo->bufsize - hsize;
125 dst += hsize; 125 dst += hsize;
126 126
127 l2_cpy = min(l2, size); 127 if (!kmsg_dump_get_buffer(dumper, true, dst, size, &len))
128 l1_cpy = min(l1, size - l2_cpy);
129
130 if (l1_cpy + l2_cpy == 0)
131 break; 128 break;
132 129
133 s2_start = l2 - l2_cpy;
134 s1_start = l1 - l1_cpy;
135
136 memcpy(dst, s1 + s1_start, l1_cpy);
137 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
138
139 ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, 130 ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
140 hsize + l1_cpy + l2_cpy, psinfo); 131 hsize + len, psinfo);
141 if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) 132 if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
142 pstore_new_entry = 1; 133 pstore_new_entry = 1;
143 l1 -= l1_cpy; 134
144 l2 -= l2_cpy; 135 total += hsize + len;
145 total += l1_cpy + l2_cpy;
146 part++; 136 part++;
147 } 137 }
148 if (in_nmi()) { 138 if (in_nmi()) {
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 9123cce28c1e..453030f9c5bc 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -106,6 +106,8 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
106 time->tv_sec = 0; 106 time->tv_sec = 0;
107 time->tv_nsec = 0; 107 time->tv_nsec = 0;
108 108
109 /* Update old/shadowed buffer. */
110 persistent_ram_save_old(prz);
109 size = persistent_ram_old_size(prz); 111 size = persistent_ram_old_size(prz);
110 *buf = kmalloc(size, GFP_KERNEL); 112 *buf = kmalloc(size, GFP_KERNEL);
111 if (*buf == NULL) 113 if (*buf == NULL)
@@ -184,6 +186,7 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id,
184 return -EINVAL; 186 return -EINVAL;
185 187
186 persistent_ram_free_old(cxt->przs[id]); 188 persistent_ram_free_old(cxt->przs[id]);
189 persistent_ram_zap(cxt->przs[id]);
187 190
188 return 0; 191 return 0;
189} 192}
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 31f8d184f3a0..c5fbdbbf81ac 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -250,23 +250,24 @@ static void notrace persistent_ram_update(struct persistent_ram_zone *prz,
250 persistent_ram_update_ecc(prz, start, count); 250 persistent_ram_update_ecc(prz, start, count);
251} 251}
252 252
253static void __init 253void persistent_ram_save_old(struct persistent_ram_zone *prz)
254persistent_ram_save_old(struct persistent_ram_zone *prz)
255{ 254{
256 struct persistent_ram_buffer *buffer = prz->buffer; 255 struct persistent_ram_buffer *buffer = prz->buffer;
257 size_t size = buffer_size(prz); 256 size_t size = buffer_size(prz);
258 size_t start = buffer_start(prz); 257 size_t start = buffer_start(prz);
259 char *dest;
260 258
261 persistent_ram_ecc_old(prz); 259 if (!size)
260 return;
262 261
263 dest = kmalloc(size, GFP_KERNEL); 262 if (!prz->old_log) {
264 if (dest == NULL) { 263 persistent_ram_ecc_old(prz);
264 prz->old_log = kmalloc(size, GFP_KERNEL);
265 }
266 if (!prz->old_log) {
265 pr_err("persistent_ram: failed to allocate buffer\n"); 267 pr_err("persistent_ram: failed to allocate buffer\n");
266 return; 268 return;
267 } 269 }
268 270
269 prz->old_log = dest;
270 prz->old_log_size = size; 271 prz->old_log_size = size;
271 memcpy(prz->old_log, &buffer->data[start], size - start); 272 memcpy(prz->old_log, &buffer->data[start], size - start);
272 memcpy(prz->old_log + size - start, &buffer->data[0], start); 273 memcpy(prz->old_log + size - start, &buffer->data[0], start);
@@ -319,6 +320,13 @@ void persistent_ram_free_old(struct persistent_ram_zone *prz)
319 prz->old_log_size = 0; 320 prz->old_log_size = 0;
320} 321}
321 322
323void persistent_ram_zap(struct persistent_ram_zone *prz)
324{
325 atomic_set(&prz->buffer->start, 0);
326 atomic_set(&prz->buffer->size, 0);
327 persistent_ram_update_header_ecc(prz);
328}
329
322static void *persistent_ram_vmap(phys_addr_t start, size_t size) 330static void *persistent_ram_vmap(phys_addr_t start, size_t size)
323{ 331{
324 struct page **pages; 332 struct page **pages;
@@ -405,6 +413,7 @@ static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool
405 " size %zu, start %zu\n", 413 " size %zu, start %zu\n",
406 buffer_size(prz), buffer_start(prz)); 414 buffer_size(prz), buffer_start(prz));
407 persistent_ram_save_old(prz); 415 persistent_ram_save_old(prz);
416 return 0;
408 } 417 }
409 } else { 418 } else {
410 pr_info("persistent_ram: no valid data in buffer" 419 pr_info("persistent_ram: no valid data in buffer"
@@ -412,8 +421,7 @@ static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool
412 } 421 }
413 422
414 prz->buffer->sig = PERSISTENT_RAM_SIG; 423 prz->buffer->sig = PERSISTENT_RAM_SIG;
415 atomic_set(&prz->buffer->start, 0); 424 persistent_ram_zap(prz);
416 atomic_set(&prz->buffer->size, 0);
417 425
418 return 0; 426 return 0;
419} 427}
@@ -448,7 +456,6 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start,
448 goto err; 456 goto err;
449 457
450 persistent_ram_post_init(prz, ecc); 458 persistent_ram_post_init(prz, ecc);
451 persistent_ram_update_header_ecc(prz);
452 459
453 return prz; 460 return prz;
454err: 461err:
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index fbb0b478a346..d5378d028589 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
110 110
111 /* prevent the page from being discarded on memory pressure */ 111 /* prevent the page from being discarded on memory pressure */
112 SetPageDirty(page); 112 SetPageDirty(page);
113 SetPageUptodate(page);
113 114
114 unlock_page(page); 115 unlock_page(page);
115 put_page(page); 116 put_page(page);
diff --git a/fs/splice.c b/fs/splice.c
index c9f1318a3b82..7bf08fa22ec9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -273,13 +273,16 @@ void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
273 * Check if we need to grow the arrays holding pages and partial page 273 * Check if we need to grow the arrays holding pages and partial page
274 * descriptions. 274 * descriptions.
275 */ 275 */
276int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) 276int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
277{ 277{
278 if (pipe->buffers <= PIPE_DEF_BUFFERS) 278 unsigned int buffers = ACCESS_ONCE(pipe->buffers);
279
280 spd->nr_pages_max = buffers;
281 if (buffers <= PIPE_DEF_BUFFERS)
279 return 0; 282 return 0;
280 283
281 spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); 284 spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL);
282 spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); 285 spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL);
283 286
284 if (spd->pages && spd->partial) 287 if (spd->pages && spd->partial)
285 return 0; 288 return 0;
@@ -289,10 +292,9 @@ int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
289 return -ENOMEM; 292 return -ENOMEM;
290} 293}
291 294
292void splice_shrink_spd(struct pipe_inode_info *pipe, 295void splice_shrink_spd(struct splice_pipe_desc *spd)
293 struct splice_pipe_desc *spd)
294{ 296{
295 if (pipe->buffers <= PIPE_DEF_BUFFERS) 297 if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
296 return; 298 return;
297 299
298 kfree(spd->pages); 300 kfree(spd->pages);
@@ -315,6 +317,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
315 struct splice_pipe_desc spd = { 317 struct splice_pipe_desc spd = {
316 .pages = pages, 318 .pages = pages,
317 .partial = partial, 319 .partial = partial,
320 .nr_pages_max = PIPE_DEF_BUFFERS,
318 .flags = flags, 321 .flags = flags,
319 .ops = &page_cache_pipe_buf_ops, 322 .ops = &page_cache_pipe_buf_ops,
320 .spd_release = spd_release_page, 323 .spd_release = spd_release_page,
@@ -326,7 +329,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
326 index = *ppos >> PAGE_CACHE_SHIFT; 329 index = *ppos >> PAGE_CACHE_SHIFT;
327 loff = *ppos & ~PAGE_CACHE_MASK; 330 loff = *ppos & ~PAGE_CACHE_MASK;
328 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 331 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
329 nr_pages = min(req_pages, pipe->buffers); 332 nr_pages = min(req_pages, spd.nr_pages_max);
330 333
331 /* 334 /*
332 * Lookup the (hopefully) full range of pages we need. 335 * Lookup the (hopefully) full range of pages we need.
@@ -497,7 +500,7 @@ fill_it:
497 if (spd.nr_pages) 500 if (spd.nr_pages)
498 error = splice_to_pipe(pipe, &spd); 501 error = splice_to_pipe(pipe, &spd);
499 502
500 splice_shrink_spd(pipe, &spd); 503 splice_shrink_spd(&spd);
501 return error; 504 return error;
502} 505}
503 506
@@ -598,6 +601,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
598 struct splice_pipe_desc spd = { 601 struct splice_pipe_desc spd = {
599 .pages = pages, 602 .pages = pages,
600 .partial = partial, 603 .partial = partial,
604 .nr_pages_max = PIPE_DEF_BUFFERS,
601 .flags = flags, 605 .flags = flags,
602 .ops = &default_pipe_buf_ops, 606 .ops = &default_pipe_buf_ops,
603 .spd_release = spd_release_page, 607 .spd_release = spd_release_page,
@@ -608,8 +612,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
608 612
609 res = -ENOMEM; 613 res = -ENOMEM;
610 vec = __vec; 614 vec = __vec;
611 if (pipe->buffers > PIPE_DEF_BUFFERS) { 615 if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
612 vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); 616 vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
613 if (!vec) 617 if (!vec)
614 goto shrink_ret; 618 goto shrink_ret;
615 } 619 }
@@ -617,7 +621,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
617 offset = *ppos & ~PAGE_CACHE_MASK; 621 offset = *ppos & ~PAGE_CACHE_MASK;
618 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 622 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
619 623
620 for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { 624 for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
621 struct page *page; 625 struct page *page;
622 626
623 page = alloc_page(GFP_USER); 627 page = alloc_page(GFP_USER);
@@ -665,7 +669,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
665shrink_ret: 669shrink_ret:
666 if (vec != __vec) 670 if (vec != __vec)
667 kfree(vec); 671 kfree(vec);
668 splice_shrink_spd(pipe, &spd); 672 splice_shrink_spd(&spd);
669 return res; 673 return res;
670 674
671err: 675err:
@@ -1614,6 +1618,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1614 struct splice_pipe_desc spd = { 1618 struct splice_pipe_desc spd = {
1615 .pages = pages, 1619 .pages = pages,
1616 .partial = partial, 1620 .partial = partial,
1621 .nr_pages_max = PIPE_DEF_BUFFERS,
1617 .flags = flags, 1622 .flags = flags,
1618 .ops = &user_page_pipe_buf_ops, 1623 .ops = &user_page_pipe_buf_ops,
1619 .spd_release = spd_release_page, 1624 .spd_release = spd_release_page,
@@ -1629,13 +1634,13 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1629 1634
1630 spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, 1635 spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
1631 spd.partial, false, 1636 spd.partial, false,
1632 pipe->buffers); 1637 spd.nr_pages_max);
1633 if (spd.nr_pages <= 0) 1638 if (spd.nr_pages <= 0)
1634 ret = spd.nr_pages; 1639 ret = spd.nr_pages;
1635 else 1640 else
1636 ret = splice_to_pipe(pipe, &spd); 1641 ret = splice_to_pipe(pipe, &spd);
1637 1642
1638 splice_shrink_spd(pipe, &spd); 1643 splice_shrink_spd(&spd);
1639 return ret; 1644 return ret;
1640} 1645}
1641 1646
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 84a7e6f3c046..92df3b081539 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2918,7 +2918,7 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2918 struct dentry *dent; 2918 struct dentry *dent;
2919 struct ubifs_debug_info *d = c->dbg; 2919 struct ubifs_debug_info *d = c->dbg;
2920 2920
2921 if (!IS_ENABLED(DEBUG_FS)) 2921 if (!IS_ENABLED(CONFIG_DEBUG_FS))
2922 return 0; 2922 return 0;
2923 2923
2924 n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, 2924 n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
@@ -3013,7 +3013,7 @@ out:
3013 */ 3013 */
3014void dbg_debugfs_exit_fs(struct ubifs_info *c) 3014void dbg_debugfs_exit_fs(struct ubifs_info *c)
3015{ 3015{
3016 if (IS_ENABLED(DEBUG_FS)) 3016 if (IS_ENABLED(CONFIG_DEBUG_FS))
3017 debugfs_remove_recursive(c->dbg->dfs_dir); 3017 debugfs_remove_recursive(c->dbg->dfs_dir);
3018} 3018}
3019 3019
@@ -3099,7 +3099,7 @@ int dbg_debugfs_init(void)
3099 const char *fname; 3099 const char *fname;
3100 struct dentry *dent; 3100 struct dentry *dent;
3101 3101
3102 if (!IS_ENABLED(DEBUG_FS)) 3102 if (!IS_ENABLED(CONFIG_DEBUG_FS))
3103 return 0; 3103 return 0;
3104 3104
3105 fname = "ubifs"; 3105 fname = "ubifs";
@@ -3166,7 +3166,7 @@ out:
3166 */ 3166 */
3167void dbg_debugfs_exit(void) 3167void dbg_debugfs_exit(void)
3168{ 3168{
3169 if (IS_ENABLED(DEBUG_FS)) 3169 if (IS_ENABLED(CONFIG_DEBUG_FS))
3170 debugfs_remove_recursive(dfs_rootdir); 3170 debugfs_remove_recursive(dfs_rootdir);
3171} 3171}
3172 3172
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 2559d174e004..28ec13af28d9 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -939,8 +939,8 @@ static int find_dirtiest_idx_leb(struct ubifs_info *c)
939 } 939 }
940 dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty, 940 dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,
941 lp->free, lp->flags); 941 lp->free, lp->flags);
942 ubifs_assert(lp->flags | LPROPS_TAKEN); 942 ubifs_assert(lp->flags & LPROPS_TAKEN);
943 ubifs_assert(lp->flags | LPROPS_INDEX); 943 ubifs_assert(lp->flags & LPROPS_INDEX);
944 return lnum; 944 return lnum;
945} 945}
946 946
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index ef3d1ba6d992..15e2fc5aa60b 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -718,8 +718,12 @@ static int fixup_free_space(struct ubifs_info *c)
718 lnum = ubifs_next_log_lnum(c, lnum); 718 lnum = ubifs_next_log_lnum(c, lnum);
719 } 719 }
720 720
721 /* Fixup the current log head */ 721 /*
722 err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); 722 * Fixup the log head which contains the only a CS node at the
723 * beginning.
724 */
725 err = fixup_leb(c, c->lhead_lnum,
726 ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
723 if (err) 727 if (err)
724 goto out; 728 goto out;
725 729
diff --git a/fs/udf/super.c b/fs/udf/super.c
index ac8a348dcb69..8d86a8706c0e 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -56,6 +56,7 @@
56#include <linux/seq_file.h> 56#include <linux/seq_file.h>
57#include <linux/bitmap.h> 57#include <linux/bitmap.h>
58#include <linux/crc-itu-t.h> 58#include <linux/crc-itu-t.h>
59#include <linux/log2.h>
59#include <asm/byteorder.h> 60#include <asm/byteorder.h>
60 61
61#include "udf_sb.h" 62#include "udf_sb.h"
@@ -1215,16 +1216,65 @@ out_bh:
1215 return ret; 1216 return ret;
1216} 1217}
1217 1218
1219static int udf_load_sparable_map(struct super_block *sb,
1220 struct udf_part_map *map,
1221 struct sparablePartitionMap *spm)
1222{
1223 uint32_t loc;
1224 uint16_t ident;
1225 struct sparingTable *st;
1226 struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing;
1227 int i;
1228 struct buffer_head *bh;
1229
1230 map->s_partition_type = UDF_SPARABLE_MAP15;
1231 sdata->s_packet_len = le16_to_cpu(spm->packetLength);
1232 if (!is_power_of_2(sdata->s_packet_len)) {
1233 udf_err(sb, "error loading logical volume descriptor: "
1234 "Invalid packet length %u\n",
1235 (unsigned)sdata->s_packet_len);
1236 return -EIO;
1237 }
1238 if (spm->numSparingTables > 4) {
1239 udf_err(sb, "error loading logical volume descriptor: "
1240 "Too many sparing tables (%d)\n",
1241 (int)spm->numSparingTables);
1242 return -EIO;
1243 }
1244
1245 for (i = 0; i < spm->numSparingTables; i++) {
1246 loc = le32_to_cpu(spm->locSparingTable[i]);
1247 bh = udf_read_tagged(sb, loc, loc, &ident);
1248 if (!bh)
1249 continue;
1250
1251 st = (struct sparingTable *)bh->b_data;
1252 if (ident != 0 ||
1253 strncmp(st->sparingIdent.ident, UDF_ID_SPARING,
1254 strlen(UDF_ID_SPARING)) ||
1255 sizeof(*st) + le16_to_cpu(st->reallocationTableLen) >
1256 sb->s_blocksize) {
1257 brelse(bh);
1258 continue;
1259 }
1260
1261 sdata->s_spar_map[i] = bh;
1262 }
1263 map->s_partition_func = udf_get_pblock_spar15;
1264 return 0;
1265}
1266
1218static int udf_load_logicalvol(struct super_block *sb, sector_t block, 1267static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1219 struct kernel_lb_addr *fileset) 1268 struct kernel_lb_addr *fileset)
1220{ 1269{
1221 struct logicalVolDesc *lvd; 1270 struct logicalVolDesc *lvd;
1222 int i, j, offset; 1271 int i, offset;
1223 uint8_t type; 1272 uint8_t type;
1224 struct udf_sb_info *sbi = UDF_SB(sb); 1273 struct udf_sb_info *sbi = UDF_SB(sb);
1225 struct genericPartitionMap *gpm; 1274 struct genericPartitionMap *gpm;
1226 uint16_t ident; 1275 uint16_t ident;
1227 struct buffer_head *bh; 1276 struct buffer_head *bh;
1277 unsigned int table_len;
1228 int ret = 0; 1278 int ret = 0;
1229 1279
1230 bh = udf_read_tagged(sb, block, block, &ident); 1280 bh = udf_read_tagged(sb, block, block, &ident);
@@ -1232,15 +1282,20 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1232 return 1; 1282 return 1;
1233 BUG_ON(ident != TAG_IDENT_LVD); 1283 BUG_ON(ident != TAG_IDENT_LVD);
1234 lvd = (struct logicalVolDesc *)bh->b_data; 1284 lvd = (struct logicalVolDesc *)bh->b_data;
1235 1285 table_len = le32_to_cpu(lvd->mapTableLength);
1236 i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); 1286 if (sizeof(*lvd) + table_len > sb->s_blocksize) {
1237 if (i != 0) { 1287 udf_err(sb, "error loading logical volume descriptor: "
1238 ret = i; 1288 "Partition table too long (%u > %lu)\n", table_len,
1289 sb->s_blocksize - sizeof(*lvd));
1239 goto out_bh; 1290 goto out_bh;
1240 } 1291 }
1241 1292
1293 ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
1294 if (ret)
1295 goto out_bh;
1296
1242 for (i = 0, offset = 0; 1297 for (i = 0, offset = 0;
1243 i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); 1298 i < sbi->s_partitions && offset < table_len;
1244 i++, offset += gpm->partitionMapLength) { 1299 i++, offset += gpm->partitionMapLength) {
1245 struct udf_part_map *map = &sbi->s_partmaps[i]; 1300 struct udf_part_map *map = &sbi->s_partmaps[i];
1246 gpm = (struct genericPartitionMap *) 1301 gpm = (struct genericPartitionMap *)
@@ -1275,38 +1330,9 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1275 } else if (!strncmp(upm2->partIdent.ident, 1330 } else if (!strncmp(upm2->partIdent.ident,
1276 UDF_ID_SPARABLE, 1331 UDF_ID_SPARABLE,
1277 strlen(UDF_ID_SPARABLE))) { 1332 strlen(UDF_ID_SPARABLE))) {
1278 uint32_t loc; 1333 if (udf_load_sparable_map(sb, map,
1279 struct sparingTable *st; 1334 (struct sparablePartitionMap *)gpm) < 0)
1280 struct sparablePartitionMap *spm = 1335 goto out_bh;
1281 (struct sparablePartitionMap *)gpm;
1282
1283 map->s_partition_type = UDF_SPARABLE_MAP15;
1284 map->s_type_specific.s_sparing.s_packet_len =
1285 le16_to_cpu(spm->packetLength);
1286 for (j = 0; j < spm->numSparingTables; j++) {
1287 struct buffer_head *bh2;
1288
1289 loc = le32_to_cpu(
1290 spm->locSparingTable[j]);
1291 bh2 = udf_read_tagged(sb, loc, loc,
1292 &ident);
1293 map->s_type_specific.s_sparing.
1294 s_spar_map[j] = bh2;
1295
1296 if (bh2 == NULL)
1297 continue;
1298
1299 st = (struct sparingTable *)bh2->b_data;
1300 if (ident != 0 || strncmp(
1301 st->sparingIdent.ident,
1302 UDF_ID_SPARING,
1303 strlen(UDF_ID_SPARING))) {
1304 brelse(bh2);
1305 map->s_type_specific.s_sparing.
1306 s_spar_map[j] = NULL;
1307 }
1308 }
1309 map->s_partition_func = udf_get_pblock_spar15;
1310 } else if (!strncmp(upm2->partIdent.ident, 1336 } else if (!strncmp(upm2->partIdent.ident,
1311 UDF_ID_METADATA, 1337 UDF_ID_METADATA,
1312 strlen(UDF_ID_METADATA))) { 1338 strlen(UDF_ID_METADATA))) {
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 229641fb8e67..4f33c32affe3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1074,12 +1074,13 @@ restart:
1074 * If we couldn't get anything, give up. 1074 * If we couldn't get anything, give up.
1075 */ 1075 */
1076 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1076 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1077 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1078
1077 if (!forced++) { 1079 if (!forced++) {
1078 trace_xfs_alloc_near_busy(args); 1080 trace_xfs_alloc_near_busy(args);
1079 xfs_log_force(args->mp, XFS_LOG_SYNC); 1081 xfs_log_force(args->mp, XFS_LOG_SYNC);
1080 goto restart; 1082 goto restart;
1081 } 1083 }
1082
1083 trace_xfs_alloc_size_neither(args); 1084 trace_xfs_alloc_size_neither(args);
1084 args->agbno = NULLAGBLOCK; 1085 args->agbno = NULLAGBLOCK;
1085 return 0; 1086 return 0;
@@ -2433,15 +2434,24 @@ xfs_alloc_vextent_worker(
2433 current_restore_flags_nested(&pflags, PF_FSTRANS); 2434 current_restore_flags_nested(&pflags, PF_FSTRANS);
2434} 2435}
2435 2436
2436 2437/*
2437int /* error */ 2438 * Data allocation requests often come in with little stack to work on. Push
2439 * them off to a worker thread so there is lots of stack to use. Metadata
2440 * requests, OTOH, are generally from low stack usage paths, so avoid the
2441 * context switch overhead here.
2442 */
2443int
2438xfs_alloc_vextent( 2444xfs_alloc_vextent(
2439 xfs_alloc_arg_t *args) /* allocation argument structure */ 2445 struct xfs_alloc_arg *args)
2440{ 2446{
2441 DECLARE_COMPLETION_ONSTACK(done); 2447 DECLARE_COMPLETION_ONSTACK(done);
2442 2448
2449 if (!args->userdata)
2450 return __xfs_alloc_vextent(args);
2451
2452
2443 args->done = &done; 2453 args->done = &done;
2444 INIT_WORK(&args->work, xfs_alloc_vextent_worker); 2454 INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
2445 queue_work(xfs_alloc_wq, &args->work); 2455 queue_work(xfs_alloc_wq, &args->work);
2446 wait_for_completion(&done); 2456 wait_for_completion(&done);
2447 return args->result; 2457 return args->result;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index ae31c313a79e..8dad722c0041 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -981,10 +981,15 @@ xfs_vm_writepage(
981 imap_valid = 0; 981 imap_valid = 0;
982 } 982 }
983 } else { 983 } else {
984 if (PageUptodate(page)) { 984 if (PageUptodate(page))
985 ASSERT(buffer_mapped(bh)); 985 ASSERT(buffer_mapped(bh));
986 imap_valid = 0; 986 /*
987 } 987 * This buffer is not uptodate and will not be
988 * written to disk. Ensure that we will put any
989 * subsequent writeable buffers into a new
990 * ioend.
991 */
992 imap_valid = 0;
988 continue; 993 continue;
989 } 994 }
990 995
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 172d3cc8f8cb..269b35c084da 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -201,14 +201,7 @@ xfs_buf_alloc(
201 bp->b_length = numblks; 201 bp->b_length = numblks;
202 bp->b_io_length = numblks; 202 bp->b_io_length = numblks;
203 bp->b_flags = flags; 203 bp->b_flags = flags;
204 204 bp->b_bn = blkno;
205 /*
206 * We do not set the block number here in the buffer because we have not
207 * finished initialising the buffer. We insert the buffer into the cache
208 * in this state, so this ensures that we are unable to do IO on a
209 * buffer that hasn't been fully initialised.
210 */
211 bp->b_bn = XFS_BUF_DADDR_NULL;
212 atomic_set(&bp->b_pin_count, 0); 205 atomic_set(&bp->b_pin_count, 0);
213 init_waitqueue_head(&bp->b_waiters); 206 init_waitqueue_head(&bp->b_waiters);
214 207
@@ -567,11 +560,6 @@ xfs_buf_get(
567 if (bp != new_bp) 560 if (bp != new_bp)
568 xfs_buf_free(new_bp); 561 xfs_buf_free(new_bp);
569 562
570 /*
571 * Now we have a workable buffer, fill in the block number so
572 * that we can do IO on it.
573 */
574 bp->b_bn = blkno;
575 bp->b_io_length = bp->b_length; 563 bp->b_io_length = bp->b_length;
576 564
577found: 565found:
@@ -772,7 +760,7 @@ xfs_buf_get_uncached(
772 int error, i; 760 int error, i;
773 xfs_buf_t *bp; 761 xfs_buf_t *bp;
774 762
775 bp = xfs_buf_alloc(target, 0, numblks, 0); 763 bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0);
776 if (unlikely(bp == NULL)) 764 if (unlikely(bp == NULL))
777 goto fail; 765 goto fail;
778 766
@@ -1001,27 +989,6 @@ xfs_buf_ioerror_alert(
1001 (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); 989 (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
1002} 990}
1003 991
1004int
1005xfs_bwrite(
1006 struct xfs_buf *bp)
1007{
1008 int error;
1009
1010 ASSERT(xfs_buf_islocked(bp));
1011
1012 bp->b_flags |= XBF_WRITE;
1013 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
1014
1015 xfs_bdstrat_cb(bp);
1016
1017 error = xfs_buf_iowait(bp);
1018 if (error) {
1019 xfs_force_shutdown(bp->b_target->bt_mount,
1020 SHUTDOWN_META_IO_ERROR);
1021 }
1022 return error;
1023}
1024
1025/* 992/*
1026 * Called when we want to stop a buffer from getting written or read. 993 * Called when we want to stop a buffer from getting written or read.
1027 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend 994 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
@@ -1091,14 +1058,7 @@ xfs_bioerror_relse(
1091 return EIO; 1058 return EIO;
1092} 1059}
1093 1060
1094 1061STATIC int
1095/*
1096 * All xfs metadata buffers except log state machine buffers
1097 * get this attached as their b_bdstrat callback function.
1098 * This is so that we can catch a buffer
1099 * after prematurely unpinning it to forcibly shutdown the filesystem.
1100 */
1101int
1102xfs_bdstrat_cb( 1062xfs_bdstrat_cb(
1103 struct xfs_buf *bp) 1063 struct xfs_buf *bp)
1104{ 1064{
@@ -1119,6 +1079,27 @@ xfs_bdstrat_cb(
1119 return 0; 1079 return 0;
1120} 1080}
1121 1081
1082int
1083xfs_bwrite(
1084 struct xfs_buf *bp)
1085{
1086 int error;
1087
1088 ASSERT(xfs_buf_islocked(bp));
1089
1090 bp->b_flags |= XBF_WRITE;
1091 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
1092
1093 xfs_bdstrat_cb(bp);
1094
1095 error = xfs_buf_iowait(bp);
1096 if (error) {
1097 xfs_force_shutdown(bp->b_target->bt_mount,
1098 SHUTDOWN_META_IO_ERROR);
1099 }
1100 return error;
1101}
1102
1122/* 1103/*
1123 * Wrapper around bdstrat so that we can stop data from going to disk in case 1104 * Wrapper around bdstrat so that we can stop data from going to disk in case
1124 * we are shutting down the filesystem. Typically user data goes thru this 1105 * we are shutting down the filesystem. Typically user data goes thru this
@@ -1255,7 +1236,7 @@ xfs_buf_iorequest(
1255 */ 1236 */
1256 atomic_set(&bp->b_io_remaining, 1); 1237 atomic_set(&bp->b_io_remaining, 1);
1257 _xfs_buf_ioapply(bp); 1238 _xfs_buf_ioapply(bp);
1258 _xfs_buf_ioend(bp, 0); 1239 _xfs_buf_ioend(bp, 1);
1259 1240
1260 xfs_buf_rele(bp); 1241 xfs_buf_rele(bp);
1261} 1242}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7f1d1392ce37..79344c48008e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
180extern int xfs_bwrite(struct xfs_buf *bp); 180extern int xfs_bwrite(struct xfs_buf *bp);
181 181
182extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); 182extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
183extern int xfs_bdstrat_cb(struct xfs_buf *);
184 183
185extern void xfs_buf_ioend(xfs_buf_t *, int); 184extern void xfs_buf_ioend(xfs_buf_t *, int);
186extern void xfs_buf_ioerror(xfs_buf_t *, int); 185extern void xfs_buf_ioerror(xfs_buf_t *, int);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 45df2b857d48..d9e451115f98 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks(
954 954
955 if (!XFS_BUF_ISSTALE(bp)) { 955 if (!XFS_BUF_ISSTALE(bp)) {
956 bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; 956 bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
957 xfs_bdstrat_cb(bp); 957 xfs_buf_iorequest(bp);
958 } else { 958 } else {
959 xfs_buf_relse(bp); 959 xfs_buf_relse(bp);
960 } 960 }
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6cdbf90c6f7b..d041d47d9d86 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -505,6 +505,14 @@ xfs_inode_item_push(
505 } 505 }
506 506
507 /* 507 /*
508 * Stale inode items should force out the iclog.
509 */
510 if (ip->i_flags & XFS_ISTALE) {
511 rval = XFS_ITEM_PINNED;
512 goto out_unlock;
513 }
514
515 /*
508 * Someone else is already flushing the inode. Nothing we can do 516 * Someone else is already flushing the inode. Nothing we can do
509 * here but wait for the flush to finish and remove the item from 517 * here but wait for the flush to finish and remove the item from
510 * the AIL. 518 * the AIL.
@@ -514,15 +522,6 @@ xfs_inode_item_push(
514 goto out_unlock; 522 goto out_unlock;
515 } 523 }
516 524
517 /*
518 * Stale inode items should force out the iclog.
519 */
520 if (ip->i_flags & XFS_ISTALE) {
521 xfs_ifunlock(ip);
522 xfs_iunlock(ip, XFS_ILOCK_SHARED);
523 return XFS_ITEM_PINNED;
524 }
525
526 ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); 525 ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
527 ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); 526 ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
528 527
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f30d9807dc48..d90d4a388609 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -38,13 +38,21 @@
38kmem_zone_t *xfs_log_ticket_zone; 38kmem_zone_t *xfs_log_ticket_zone;
39 39
40/* Local miscellaneous function prototypes */ 40/* Local miscellaneous function prototypes */
41STATIC int xlog_commit_record(struct log *log, struct xlog_ticket *ticket, 41STATIC int
42 xlog_in_core_t **, xfs_lsn_t *); 42xlog_commit_record(
43 struct xlog *log,
44 struct xlog_ticket *ticket,
45 struct xlog_in_core **iclog,
46 xfs_lsn_t *commitlsnp);
47
43STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, 48STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
44 xfs_buftarg_t *log_target, 49 xfs_buftarg_t *log_target,
45 xfs_daddr_t blk_offset, 50 xfs_daddr_t blk_offset,
46 int num_bblks); 51 int num_bblks);
47STATIC int xlog_space_left(struct log *log, atomic64_t *head); 52STATIC int
53xlog_space_left(
54 struct xlog *log,
55 atomic64_t *head);
48STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 56STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
49STATIC void xlog_dealloc_log(xlog_t *log); 57STATIC void xlog_dealloc_log(xlog_t *log);
50 58
@@ -64,8 +72,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t *log,
64 int eventual_size); 72 int eventual_size);
65STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); 73STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
66 74
67STATIC void xlog_grant_push_ail(struct log *log, 75STATIC void
68 int need_bytes); 76xlog_grant_push_ail(
77 struct xlog *log,
78 int need_bytes);
69STATIC void xlog_regrant_reserve_log_space(xlog_t *log, 79STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
70 xlog_ticket_t *ticket); 80 xlog_ticket_t *ticket);
71STATIC void xlog_ungrant_log_space(xlog_t *log, 81STATIC void xlog_ungrant_log_space(xlog_t *log,
@@ -73,7 +83,9 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
73 83
74#if defined(DEBUG) 84#if defined(DEBUG)
75STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); 85STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
76STATIC void xlog_verify_grant_tail(struct log *log); 86STATIC void
87xlog_verify_grant_tail(
88 struct xlog *log);
77STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, 89STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
78 int count, boolean_t syncing); 90 int count, boolean_t syncing);
79STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, 91STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
@@ -89,9 +101,9 @@ STATIC int xlog_iclogs_empty(xlog_t *log);
89 101
90static void 102static void
91xlog_grant_sub_space( 103xlog_grant_sub_space(
92 struct log *log, 104 struct xlog *log,
93 atomic64_t *head, 105 atomic64_t *head,
94 int bytes) 106 int bytes)
95{ 107{
96 int64_t head_val = atomic64_read(head); 108 int64_t head_val = atomic64_read(head);
97 int64_t new, old; 109 int64_t new, old;
@@ -115,9 +127,9 @@ xlog_grant_sub_space(
115 127
116static void 128static void
117xlog_grant_add_space( 129xlog_grant_add_space(
118 struct log *log, 130 struct xlog *log,
119 atomic64_t *head, 131 atomic64_t *head,
120 int bytes) 132 int bytes)
121{ 133{
122 int64_t head_val = atomic64_read(head); 134 int64_t head_val = atomic64_read(head);
123 int64_t new, old; 135 int64_t new, old;
@@ -165,7 +177,7 @@ xlog_grant_head_wake_all(
165 177
166static inline int 178static inline int
167xlog_ticket_reservation( 179xlog_ticket_reservation(
168 struct log *log, 180 struct xlog *log,
169 struct xlog_grant_head *head, 181 struct xlog_grant_head *head,
170 struct xlog_ticket *tic) 182 struct xlog_ticket *tic)
171{ 183{
@@ -182,7 +194,7 @@ xlog_ticket_reservation(
182 194
183STATIC bool 195STATIC bool
184xlog_grant_head_wake( 196xlog_grant_head_wake(
185 struct log *log, 197 struct xlog *log,
186 struct xlog_grant_head *head, 198 struct xlog_grant_head *head,
187 int *free_bytes) 199 int *free_bytes)
188{ 200{
@@ -204,7 +216,7 @@ xlog_grant_head_wake(
204 216
205STATIC int 217STATIC int
206xlog_grant_head_wait( 218xlog_grant_head_wait(
207 struct log *log, 219 struct xlog *log,
208 struct xlog_grant_head *head, 220 struct xlog_grant_head *head,
209 struct xlog_ticket *tic, 221 struct xlog_ticket *tic,
210 int need_bytes) 222 int need_bytes)
@@ -256,7 +268,7 @@ shutdown:
256 */ 268 */
257STATIC int 269STATIC int
258xlog_grant_head_check( 270xlog_grant_head_check(
259 struct log *log, 271 struct xlog *log,
260 struct xlog_grant_head *head, 272 struct xlog_grant_head *head,
261 struct xlog_ticket *tic, 273 struct xlog_ticket *tic,
262 int *need_bytes) 274 int *need_bytes)
@@ -323,7 +335,7 @@ xfs_log_regrant(
323 struct xfs_mount *mp, 335 struct xfs_mount *mp,
324 struct xlog_ticket *tic) 336 struct xlog_ticket *tic)
325{ 337{
326 struct log *log = mp->m_log; 338 struct xlog *log = mp->m_log;
327 int need_bytes; 339 int need_bytes;
328 int error = 0; 340 int error = 0;
329 341
@@ -389,7 +401,7 @@ xfs_log_reserve(
389 bool permanent, 401 bool permanent,
390 uint t_type) 402 uint t_type)
391{ 403{
392 struct log *log = mp->m_log; 404 struct xlog *log = mp->m_log;
393 struct xlog_ticket *tic; 405 struct xlog_ticket *tic;
394 int need_bytes; 406 int need_bytes;
395 int error = 0; 407 int error = 0;
@@ -465,7 +477,7 @@ xfs_log_done(
465 struct xlog_in_core **iclog, 477 struct xlog_in_core **iclog,
466 uint flags) 478 uint flags)
467{ 479{
468 struct log *log = mp->m_log; 480 struct xlog *log = mp->m_log;
469 xfs_lsn_t lsn = 0; 481 xfs_lsn_t lsn = 0;
470 482
471 if (XLOG_FORCED_SHUTDOWN(log) || 483 if (XLOG_FORCED_SHUTDOWN(log) ||
@@ -810,6 +822,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
810void 822void
811xfs_log_unmount(xfs_mount_t *mp) 823xfs_log_unmount(xfs_mount_t *mp)
812{ 824{
825 cancel_delayed_work_sync(&mp->m_sync_work);
813 xfs_trans_ail_destroy(mp); 826 xfs_trans_ail_destroy(mp);
814 xlog_dealloc_log(mp->m_log); 827 xlog_dealloc_log(mp->m_log);
815} 828}
@@ -838,7 +851,7 @@ void
838xfs_log_space_wake( 851xfs_log_space_wake(
839 struct xfs_mount *mp) 852 struct xfs_mount *mp)
840{ 853{
841 struct log *log = mp->m_log; 854 struct xlog *log = mp->m_log;
842 int free_bytes; 855 int free_bytes;
843 856
844 if (XLOG_FORCED_SHUTDOWN(log)) 857 if (XLOG_FORCED_SHUTDOWN(log))
@@ -916,7 +929,7 @@ xfs_lsn_t
916xlog_assign_tail_lsn_locked( 929xlog_assign_tail_lsn_locked(
917 struct xfs_mount *mp) 930 struct xfs_mount *mp)
918{ 931{
919 struct log *log = mp->m_log; 932 struct xlog *log = mp->m_log;
920 struct xfs_log_item *lip; 933 struct xfs_log_item *lip;
921 xfs_lsn_t tail_lsn; 934 xfs_lsn_t tail_lsn;
922 935
@@ -965,7 +978,7 @@ xlog_assign_tail_lsn(
965 */ 978 */
966STATIC int 979STATIC int
967xlog_space_left( 980xlog_space_left(
968 struct log *log, 981 struct xlog *log,
969 atomic64_t *head) 982 atomic64_t *head)
970{ 983{
971 int free_bytes; 984 int free_bytes;
@@ -1277,7 +1290,7 @@ out:
1277 */ 1290 */
1278STATIC int 1291STATIC int
1279xlog_commit_record( 1292xlog_commit_record(
1280 struct log *log, 1293 struct xlog *log,
1281 struct xlog_ticket *ticket, 1294 struct xlog_ticket *ticket,
1282 struct xlog_in_core **iclog, 1295 struct xlog_in_core **iclog,
1283 xfs_lsn_t *commitlsnp) 1296 xfs_lsn_t *commitlsnp)
@@ -1311,7 +1324,7 @@ xlog_commit_record(
1311 */ 1324 */
1312STATIC void 1325STATIC void
1313xlog_grant_push_ail( 1326xlog_grant_push_ail(
1314 struct log *log, 1327 struct xlog *log,
1315 int need_bytes) 1328 int need_bytes)
1316{ 1329{
1317 xfs_lsn_t threshold_lsn = 0; 1330 xfs_lsn_t threshold_lsn = 0;
@@ -1790,7 +1803,7 @@ xlog_write_start_rec(
1790 1803
1791static xlog_op_header_t * 1804static xlog_op_header_t *
1792xlog_write_setup_ophdr( 1805xlog_write_setup_ophdr(
1793 struct log *log, 1806 struct xlog *log,
1794 struct xlog_op_header *ophdr, 1807 struct xlog_op_header *ophdr,
1795 struct xlog_ticket *ticket, 1808 struct xlog_ticket *ticket,
1796 uint flags) 1809 uint flags)
@@ -1873,7 +1886,7 @@ xlog_write_setup_copy(
1873 1886
1874static int 1887static int
1875xlog_write_copy_finish( 1888xlog_write_copy_finish(
1876 struct log *log, 1889 struct xlog *log,
1877 struct xlog_in_core *iclog, 1890 struct xlog_in_core *iclog,
1878 uint flags, 1891 uint flags,
1879 int *record_cnt, 1892 int *record_cnt,
@@ -1958,7 +1971,7 @@ xlog_write_copy_finish(
1958 */ 1971 */
1959int 1972int
1960xlog_write( 1973xlog_write(
1961 struct log *log, 1974 struct xlog *log,
1962 struct xfs_log_vec *log_vector, 1975 struct xfs_log_vec *log_vector,
1963 struct xlog_ticket *ticket, 1976 struct xlog_ticket *ticket,
1964 xfs_lsn_t *start_lsn, 1977 xfs_lsn_t *start_lsn,
@@ -2821,7 +2834,7 @@ _xfs_log_force(
2821 uint flags, 2834 uint flags,
2822 int *log_flushed) 2835 int *log_flushed)
2823{ 2836{
2824 struct log *log = mp->m_log; 2837 struct xlog *log = mp->m_log;
2825 struct xlog_in_core *iclog; 2838 struct xlog_in_core *iclog;
2826 xfs_lsn_t lsn; 2839 xfs_lsn_t lsn;
2827 2840
@@ -2969,7 +2982,7 @@ _xfs_log_force_lsn(
2969 uint flags, 2982 uint flags,
2970 int *log_flushed) 2983 int *log_flushed)
2971{ 2984{
2972 struct log *log = mp->m_log; 2985 struct xlog *log = mp->m_log;
2973 struct xlog_in_core *iclog; 2986 struct xlog_in_core *iclog;
2974 int already_slept = 0; 2987 int already_slept = 0;
2975 2988
@@ -3147,7 +3160,7 @@ xfs_log_ticket_get(
3147 */ 3160 */
3148xlog_ticket_t * 3161xlog_ticket_t *
3149xlog_ticket_alloc( 3162xlog_ticket_alloc(
3150 struct log *log, 3163 struct xlog *log,
3151 int unit_bytes, 3164 int unit_bytes,
3152 int cnt, 3165 int cnt,
3153 char client, 3166 char client,
@@ -3278,7 +3291,7 @@ xlog_ticket_alloc(
3278 */ 3291 */
3279void 3292void
3280xlog_verify_dest_ptr( 3293xlog_verify_dest_ptr(
3281 struct log *log, 3294 struct xlog *log,
3282 char *ptr) 3295 char *ptr)
3283{ 3296{
3284 int i; 3297 int i;
@@ -3307,7 +3320,7 @@ xlog_verify_dest_ptr(
3307 */ 3320 */
3308STATIC void 3321STATIC void
3309xlog_verify_grant_tail( 3322xlog_verify_grant_tail(
3310 struct log *log) 3323 struct xlog *log)
3311{ 3324{
3312 int tail_cycle, tail_blocks; 3325 int tail_cycle, tail_blocks;
3313 int cycle, space; 3326 int cycle, space;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 7d6197c58493..ddc4529d07d3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -44,7 +44,7 @@
44 */ 44 */
45static struct xlog_ticket * 45static struct xlog_ticket *
46xlog_cil_ticket_alloc( 46xlog_cil_ticket_alloc(
47 struct log *log) 47 struct xlog *log)
48{ 48{
49 struct xlog_ticket *tic; 49 struct xlog_ticket *tic;
50 50
@@ -72,7 +72,7 @@ xlog_cil_ticket_alloc(
72 */ 72 */
73void 73void
74xlog_cil_init_post_recovery( 74xlog_cil_init_post_recovery(
75 struct log *log) 75 struct xlog *log)
76{ 76{
77 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); 77 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
78 log->l_cilp->xc_ctx->sequence = 1; 78 log->l_cilp->xc_ctx->sequence = 1;
@@ -182,7 +182,7 @@ xlog_cil_prepare_log_vecs(
182 */ 182 */
183STATIC void 183STATIC void
184xfs_cil_prepare_item( 184xfs_cil_prepare_item(
185 struct log *log, 185 struct xlog *log,
186 struct xfs_log_vec *lv, 186 struct xfs_log_vec *lv,
187 int *len, 187 int *len,
188 int *diff_iovecs) 188 int *diff_iovecs)
@@ -231,7 +231,7 @@ xfs_cil_prepare_item(
231 */ 231 */
232static void 232static void
233xlog_cil_insert_items( 233xlog_cil_insert_items(
234 struct log *log, 234 struct xlog *log,
235 struct xfs_log_vec *log_vector, 235 struct xfs_log_vec *log_vector,
236 struct xlog_ticket *ticket) 236 struct xlog_ticket *ticket)
237{ 237{
@@ -373,7 +373,7 @@ xlog_cil_committed(
373 */ 373 */
374STATIC int 374STATIC int
375xlog_cil_push( 375xlog_cil_push(
376 struct log *log) 376 struct xlog *log)
377{ 377{
378 struct xfs_cil *cil = log->l_cilp; 378 struct xfs_cil *cil = log->l_cilp;
379 struct xfs_log_vec *lv; 379 struct xfs_log_vec *lv;
@@ -601,7 +601,7 @@ xlog_cil_push_work(
601 */ 601 */
602static void 602static void
603xlog_cil_push_background( 603xlog_cil_push_background(
604 struct log *log) 604 struct xlog *log)
605{ 605{
606 struct xfs_cil *cil = log->l_cilp; 606 struct xfs_cil *cil = log->l_cilp;
607 607
@@ -629,7 +629,7 @@ xlog_cil_push_background(
629 629
630static void 630static void
631xlog_cil_push_foreground( 631xlog_cil_push_foreground(
632 struct log *log, 632 struct xlog *log,
633 xfs_lsn_t push_seq) 633 xfs_lsn_t push_seq)
634{ 634{
635 struct xfs_cil *cil = log->l_cilp; 635 struct xfs_cil *cil = log->l_cilp;
@@ -683,7 +683,7 @@ xfs_log_commit_cil(
683 xfs_lsn_t *commit_lsn, 683 xfs_lsn_t *commit_lsn,
684 int flags) 684 int flags)
685{ 685{
686 struct log *log = mp->m_log; 686 struct xlog *log = mp->m_log;
687 int log_flags = 0; 687 int log_flags = 0;
688 struct xfs_log_vec *log_vector; 688 struct xfs_log_vec *log_vector;
689 689
@@ -754,7 +754,7 @@ xfs_log_commit_cil(
754 */ 754 */
755xfs_lsn_t 755xfs_lsn_t
756xlog_cil_force_lsn( 756xlog_cil_force_lsn(
757 struct log *log, 757 struct xlog *log,
758 xfs_lsn_t sequence) 758 xfs_lsn_t sequence)
759{ 759{
760 struct xfs_cil *cil = log->l_cilp; 760 struct xfs_cil *cil = log->l_cilp;
@@ -833,7 +833,7 @@ xfs_log_item_in_current_chkpt(
833 */ 833 */
834int 834int
835xlog_cil_init( 835xlog_cil_init(
836 struct log *log) 836 struct xlog *log)
837{ 837{
838 struct xfs_cil *cil; 838 struct xfs_cil *cil;
839 struct xfs_cil_ctx *ctx; 839 struct xfs_cil_ctx *ctx;
@@ -869,7 +869,7 @@ xlog_cil_init(
869 869
870void 870void
871xlog_cil_destroy( 871xlog_cil_destroy(
872 struct log *log) 872 struct xlog *log)
873{ 873{
874 if (log->l_cilp->xc_ctx) { 874 if (log->l_cilp->xc_ctx) {
875 if (log->l_cilp->xc_ctx->ticket) 875 if (log->l_cilp->xc_ctx->ticket)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5bc33261f5be..72eba2201b14 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -19,7 +19,7 @@
19#define __XFS_LOG_PRIV_H__ 19#define __XFS_LOG_PRIV_H__
20 20
21struct xfs_buf; 21struct xfs_buf;
22struct log; 22struct xlog;
23struct xlog_ticket; 23struct xlog_ticket;
24struct xfs_mount; 24struct xfs_mount;
25 25
@@ -352,7 +352,7 @@ typedef struct xlog_in_core {
352 struct xlog_in_core *ic_next; 352 struct xlog_in_core *ic_next;
353 struct xlog_in_core *ic_prev; 353 struct xlog_in_core *ic_prev;
354 struct xfs_buf *ic_bp; 354 struct xfs_buf *ic_bp;
355 struct log *ic_log; 355 struct xlog *ic_log;
356 int ic_size; 356 int ic_size;
357 int ic_offset; 357 int ic_offset;
358 int ic_bwritecnt; 358 int ic_bwritecnt;
@@ -409,7 +409,7 @@ struct xfs_cil_ctx {
409 * operations almost as efficient as the old logging methods. 409 * operations almost as efficient as the old logging methods.
410 */ 410 */
411struct xfs_cil { 411struct xfs_cil {
412 struct log *xc_log; 412 struct xlog *xc_log;
413 struct list_head xc_cil; 413 struct list_head xc_cil;
414 spinlock_t xc_cil_lock; 414 spinlock_t xc_cil_lock;
415 struct xfs_cil_ctx *xc_ctx; 415 struct xfs_cil_ctx *xc_ctx;
@@ -487,7 +487,7 @@ struct xlog_grant_head {
487 * overflow 31 bits worth of byte offset, so using a byte number will mean 487 * overflow 31 bits worth of byte offset, so using a byte number will mean
488 * that round off problems won't occur when releasing partial reservations. 488 * that round off problems won't occur when releasing partial reservations.
489 */ 489 */
490typedef struct log { 490typedef struct xlog {
491 /* The following fields don't need locking */ 491 /* The following fields don't need locking */
492 struct xfs_mount *l_mp; /* mount point */ 492 struct xfs_mount *l_mp; /* mount point */
493 struct xfs_ail *l_ailp; /* AIL log is working with */ 493 struct xfs_ail *l_ailp; /* AIL log is working with */
@@ -553,9 +553,14 @@ extern int xlog_recover_finish(xlog_t *log);
553extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); 553extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
554 554
555extern kmem_zone_t *xfs_log_ticket_zone; 555extern kmem_zone_t *xfs_log_ticket_zone;
556struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, 556struct xlog_ticket *
557 int count, char client, bool permanent, 557xlog_ticket_alloc(
558 xfs_km_flags_t alloc_flags); 558 struct xlog *log,
559 int unit_bytes,
560 int count,
561 char client,
562 bool permanent,
563 xfs_km_flags_t alloc_flags);
559 564
560 565
561static inline void 566static inline void
@@ -567,9 +572,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
567} 572}
568 573
569void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); 574void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
570int xlog_write(struct log *log, struct xfs_log_vec *log_vector, 575int
571 struct xlog_ticket *tic, xfs_lsn_t *start_lsn, 576xlog_write(
572 xlog_in_core_t **commit_iclog, uint flags); 577 struct xlog *log,
578 struct xfs_log_vec *log_vector,
579 struct xlog_ticket *tic,
580 xfs_lsn_t *start_lsn,
581 struct xlog_in_core **commit_iclog,
582 uint flags);
573 583
574/* 584/*
575 * When we crack an atomic LSN, we sample it first so that the value will not 585 * When we crack an atomic LSN, we sample it first so that the value will not
@@ -629,17 +639,23 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
629/* 639/*
630 * Committed Item List interfaces 640 * Committed Item List interfaces
631 */ 641 */
632int xlog_cil_init(struct log *log); 642int
633void xlog_cil_init_post_recovery(struct log *log); 643xlog_cil_init(struct xlog *log);
634void xlog_cil_destroy(struct log *log); 644void
645xlog_cil_init_post_recovery(struct xlog *log);
646void
647xlog_cil_destroy(struct xlog *log);
635 648
636/* 649/*
637 * CIL force routines 650 * CIL force routines
638 */ 651 */
639xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); 652xfs_lsn_t
653xlog_cil_force_lsn(
654 struct xlog *log,
655 xfs_lsn_t sequence);
640 656
641static inline void 657static inline void
642xlog_cil_force(struct log *log) 658xlog_cil_force(struct xlog *log)
643{ 659{
644 xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); 660 xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
645} 661}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ca386909131a..a7be98abd6a9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1471,8 +1471,8 @@ xlog_recover_add_item(
1471 1471
1472STATIC int 1472STATIC int
1473xlog_recover_add_to_cont_trans( 1473xlog_recover_add_to_cont_trans(
1474 struct log *log, 1474 struct xlog *log,
1475 xlog_recover_t *trans, 1475 struct xlog_recover *trans,
1476 xfs_caddr_t dp, 1476 xfs_caddr_t dp,
1477 int len) 1477 int len)
1478{ 1478{
@@ -1517,8 +1517,8 @@ xlog_recover_add_to_cont_trans(
1517 */ 1517 */
1518STATIC int 1518STATIC int
1519xlog_recover_add_to_trans( 1519xlog_recover_add_to_trans(
1520 struct log *log, 1520 struct xlog *log,
1521 xlog_recover_t *trans, 1521 struct xlog_recover *trans,
1522 xfs_caddr_t dp, 1522 xfs_caddr_t dp,
1523 int len) 1523 int len)
1524{ 1524{
@@ -1588,8 +1588,8 @@ xlog_recover_add_to_trans(
1588 */ 1588 */
1589STATIC int 1589STATIC int
1590xlog_recover_reorder_trans( 1590xlog_recover_reorder_trans(
1591 struct log *log, 1591 struct xlog *log,
1592 xlog_recover_t *trans, 1592 struct xlog_recover *trans,
1593 int pass) 1593 int pass)
1594{ 1594{
1595 xlog_recover_item_t *item, *n; 1595 xlog_recover_item_t *item, *n;
@@ -1642,8 +1642,8 @@ xlog_recover_reorder_trans(
1642 */ 1642 */
1643STATIC int 1643STATIC int
1644xlog_recover_buffer_pass1( 1644xlog_recover_buffer_pass1(
1645 struct log *log, 1645 struct xlog *log,
1646 xlog_recover_item_t *item) 1646 struct xlog_recover_item *item)
1647{ 1647{
1648 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 1648 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
1649 struct list_head *bucket; 1649 struct list_head *bucket;
@@ -1696,7 +1696,7 @@ xlog_recover_buffer_pass1(
1696 */ 1696 */
1697STATIC int 1697STATIC int
1698xlog_check_buffer_cancelled( 1698xlog_check_buffer_cancelled(
1699 struct log *log, 1699 struct xlog *log,
1700 xfs_daddr_t blkno, 1700 xfs_daddr_t blkno,
1701 uint len, 1701 uint len,
1702 ushort flags) 1702 ushort flags)
@@ -2689,9 +2689,9 @@ xlog_recover_free_trans(
2689 2689
2690STATIC int 2690STATIC int
2691xlog_recover_commit_pass1( 2691xlog_recover_commit_pass1(
2692 struct log *log, 2692 struct xlog *log,
2693 struct xlog_recover *trans, 2693 struct xlog_recover *trans,
2694 xlog_recover_item_t *item) 2694 struct xlog_recover_item *item)
2695{ 2695{
2696 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); 2696 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
2697 2697
@@ -2716,10 +2716,10 @@ xlog_recover_commit_pass1(
2716 2716
2717STATIC int 2717STATIC int
2718xlog_recover_commit_pass2( 2718xlog_recover_commit_pass2(
2719 struct log *log, 2719 struct xlog *log,
2720 struct xlog_recover *trans, 2720 struct xlog_recover *trans,
2721 struct list_head *buffer_list, 2721 struct list_head *buffer_list,
2722 xlog_recover_item_t *item) 2722 struct xlog_recover_item *item)
2723{ 2723{
2724 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); 2724 trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
2725 2725
@@ -2753,7 +2753,7 @@ xlog_recover_commit_pass2(
2753 */ 2753 */
2754STATIC int 2754STATIC int
2755xlog_recover_commit_trans( 2755xlog_recover_commit_trans(
2756 struct log *log, 2756 struct xlog *log,
2757 struct xlog_recover *trans, 2757 struct xlog_recover *trans,
2758 int pass) 2758 int pass)
2759{ 2759{
@@ -2793,8 +2793,8 @@ out:
2793 2793
2794STATIC int 2794STATIC int
2795xlog_recover_unmount_trans( 2795xlog_recover_unmount_trans(
2796 struct log *log, 2796 struct xlog *log,
2797 xlog_recover_t *trans) 2797 struct xlog_recover *trans)
2798{ 2798{
2799 /* Do nothing now */ 2799 /* Do nothing now */
2800 xfs_warn(log->l_mp, "%s: Unmount LR", __func__); 2800 xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8b89c5ac72d9..90c1fc9eaea4 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -53,7 +53,7 @@ typedef struct xfs_trans_reservations {
53 53
54#include "xfs_sync.h" 54#include "xfs_sync.h"
55 55
56struct log; 56struct xlog;
57struct xfs_mount_args; 57struct xfs_mount_args;
58struct xfs_inode; 58struct xfs_inode;
59struct xfs_bmbt_irec; 59struct xfs_bmbt_irec;
@@ -133,7 +133,7 @@ typedef struct xfs_mount {
133 uint m_readio_blocks; /* min read size blocks */ 133 uint m_readio_blocks; /* min read size blocks */
134 uint m_writeio_log; /* min write size log bytes */ 134 uint m_writeio_log; /* min write size log bytes */
135 uint m_writeio_blocks; /* min write size blocks */ 135 uint m_writeio_blocks; /* min write size blocks */
136 struct log *m_log; /* log specific stuff */ 136 struct xlog *m_log; /* log specific stuff */
137 int m_logbufs; /* number of log buffers */ 137 int m_logbufs; /* number of log buffers */
138 int m_logbsize; /* size of each log buffer */ 138 int m_logbsize; /* size of each log buffer */
139 uint m_rsumlevels; /* rt summary levels */ 139 uint m_rsumlevels; /* rt summary levels */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index c9d3409c5ca3..1e9ee064dbb2 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -386,23 +386,23 @@ xfs_sync_worker(
386 * We shouldn't write/force the log if we are in the mount/unmount 386 * We shouldn't write/force the log if we are in the mount/unmount
387 * process or on a read only filesystem. The workqueue still needs to be 387 * process or on a read only filesystem. The workqueue still needs to be
388 * active in both cases, however, because it is used for inode reclaim 388 * active in both cases, however, because it is used for inode reclaim
389 * during these times. Use the s_umount semaphore to provide exclusion 389 * during these times. Use the MS_ACTIVE flag to avoid doing anything
390 * with unmount. 390 * during mount. Doing work during unmount is avoided by calling
391 * cancel_delayed_work_sync on this work queue before tearing down
392 * the ail and the log in xfs_log_unmount.
391 */ 393 */
392 if (down_read_trylock(&mp->m_super->s_umount)) { 394 if (!(mp->m_super->s_flags & MS_ACTIVE) &&
393 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 395 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
394 /* dgc: errors ignored here */ 396 /* dgc: errors ignored here */
395 if (mp->m_super->s_frozen == SB_UNFROZEN && 397 if (mp->m_super->s_frozen == SB_UNFROZEN &&
396 xfs_log_need_covered(mp)) 398 xfs_log_need_covered(mp))
397 error = xfs_fs_log_dummy(mp); 399 error = xfs_fs_log_dummy(mp);
398 else 400 else
399 xfs_log_force(mp, 0); 401 xfs_log_force(mp, 0);
400 402
401 /* start pushing all the metadata that is currently 403 /* start pushing all the metadata that is currently
402 * dirty */ 404 * dirty */
403 xfs_ail_push_all(mp->m_ail); 405 xfs_ail_push_all(mp->m_ail);
404 }
405 up_read(&mp->m_super->s_umount);
406 } 406 }
407 407
408 /* queue us up again */ 408 /* queue us up again */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7cf9d3529e51..caf5dabfd553 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -32,7 +32,7 @@ struct xfs_da_node_entry;
32struct xfs_dquot; 32struct xfs_dquot;
33struct xfs_log_item; 33struct xfs_log_item;
34struct xlog_ticket; 34struct xlog_ticket;
35struct log; 35struct xlog;
36struct xlog_recover; 36struct xlog_recover;
37struct xlog_recover_item; 37struct xlog_recover_item;
38struct xfs_buf_log_format; 38struct xfs_buf_log_format;
@@ -762,7 +762,7 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_force);
762DEFINE_DQUOT_EVENT(xfs_dqflush_done); 762DEFINE_DQUOT_EVENT(xfs_dqflush_done);
763 763
764DECLARE_EVENT_CLASS(xfs_loggrant_class, 764DECLARE_EVENT_CLASS(xfs_loggrant_class,
765 TP_PROTO(struct log *log, struct xlog_ticket *tic), 765 TP_PROTO(struct xlog *log, struct xlog_ticket *tic),
766 TP_ARGS(log, tic), 766 TP_ARGS(log, tic),
767 TP_STRUCT__entry( 767 TP_STRUCT__entry(
768 __field(dev_t, dev) 768 __field(dev_t, dev)
@@ -830,7 +830,7 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
830 830
831#define DEFINE_LOGGRANT_EVENT(name) \ 831#define DEFINE_LOGGRANT_EVENT(name) \
832DEFINE_EVENT(xfs_loggrant_class, name, \ 832DEFINE_EVENT(xfs_loggrant_class, name, \
833 TP_PROTO(struct log *log, struct xlog_ticket *tic), \ 833 TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \
834 TP_ARGS(log, tic)) 834 TP_ARGS(log, tic))
835DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); 835DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
836DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); 836DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
@@ -1664,7 +1664,7 @@ DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
1664DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); 1664DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
1665 1665
1666DECLARE_EVENT_CLASS(xfs_log_recover_item_class, 1666DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
1667 TP_PROTO(struct log *log, struct xlog_recover *trans, 1667 TP_PROTO(struct xlog *log, struct xlog_recover *trans,
1668 struct xlog_recover_item *item, int pass), 1668 struct xlog_recover_item *item, int pass),
1669 TP_ARGS(log, trans, item, pass), 1669 TP_ARGS(log, trans, item, pass),
1670 TP_STRUCT__entry( 1670 TP_STRUCT__entry(
@@ -1698,7 +1698,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
1698 1698
1699#define DEFINE_LOG_RECOVER_ITEM(name) \ 1699#define DEFINE_LOG_RECOVER_ITEM(name) \
1700DEFINE_EVENT(xfs_log_recover_item_class, name, \ 1700DEFINE_EVENT(xfs_log_recover_item_class, name, \
1701 TP_PROTO(struct log *log, struct xlog_recover *trans, \ 1701 TP_PROTO(struct xlog *log, struct xlog_recover *trans, \
1702 struct xlog_recover_item *item, int pass), \ 1702 struct xlog_recover_item *item, int pass), \
1703 TP_ARGS(log, trans, item, pass)) 1703 TP_ARGS(log, trans, item, pass))
1704 1704
@@ -1709,7 +1709,7 @@ DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
1709DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); 1709DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
1710 1710
1711DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, 1711DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
1712 TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), 1712 TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f),
1713 TP_ARGS(log, buf_f), 1713 TP_ARGS(log, buf_f),
1714 TP_STRUCT__entry( 1714 TP_STRUCT__entry(
1715 __field(dev_t, dev) 1715 __field(dev_t, dev)
@@ -1739,7 +1739,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
1739 1739
1740#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ 1740#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
1741DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ 1741DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
1742 TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ 1742 TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f), \
1743 TP_ARGS(log, buf_f)) 1743 TP_ARGS(log, buf_f))
1744 1744
1745DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); 1745DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
@@ -1752,7 +1752,7 @@ DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
1752DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); 1752DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
1753 1753
1754DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, 1754DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
1755 TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), 1755 TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f),
1756 TP_ARGS(log, in_f), 1756 TP_ARGS(log, in_f),
1757 TP_STRUCT__entry( 1757 TP_STRUCT__entry(
1758 __field(dev_t, dev) 1758 __field(dev_t, dev)
@@ -1790,7 +1790,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
1790) 1790)
1791#define DEFINE_LOG_RECOVER_INO_ITEM(name) \ 1791#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
1792DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ 1792DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
1793 TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ 1793 TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f), \
1794 TP_ARGS(log, in_f)) 1794 TP_ARGS(log, in_f))
1795 1795
1796DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); 1796DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);