summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/inode.c3
-rw-r--r--fs/afs/protocol_yfs.h11
-rw-r--r--fs/afs/rxrpc.c53
-rw-r--r--fs/afs/server_list.c4
-rw-r--r--fs/afs/yfsclient.c2
-rw-r--r--fs/aio.c1
-rw-r--r--fs/autofs/expire.c3
-rw-r--r--fs/autofs/inode.c4
-rw-r--r--fs/binfmt_script.c57
-rw-r--r--fs/block_dev.c28
-rw-r--r--fs/btrfs/ctree.c76
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c21
-rw-r--r--fs/btrfs/inode.c5
-rw-r--r--fs/btrfs/ioctl.c49
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/transaction.c24
-rw-r--r--fs/btrfs/volumes.c16
-rw-r--r--fs/buffer.c19
-rw-r--r--fs/ceph/addr.c5
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/quota.c13
-rw-r--r--fs/ceph/snap.c3
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/cifs/cifs_debug.c1
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h20
-rw-r--r--fs/cifs/cifssmb.c65
-rw-r--r--fs/cifs/connect.c28
-rw-r--r--fs/cifs/dfs_cache.c1
-rw-r--r--fs/cifs/file.c56
-rw-r--r--fs/cifs/inode.c10
-rw-r--r--fs/cifs/smb2file.c8
-rw-r--r--fs/cifs/smb2inode.c17
-rw-r--r--fs/cifs/smb2misc.c7
-rw-r--r--fs/cifs/smb2ops.c72
-rw-r--r--fs/cifs/smb2pdu.c108
-rw-r--r--fs/cifs/smb2pdu.h19
-rw-r--r--fs/cifs/trace.c10
-rw-r--r--fs/cifs/trace.h10
-rw-r--r--fs/cifs/transport.c113
-rw-r--r--fs/dcache.c38
-rw-r--r--fs/debugfs/inode.c36
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/drop_caches.c8
-rw-r--r--fs/ext4/fsync.c13
-rw-r--r--fs/fs-writeback.c40
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/glops.c1
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/lops.c190
-rw-r--r--fs/gfs2/lops.h4
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/recovery.c123
-rw-r--r--fs/gfs2/recovery.h2
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/hugetlbfs/inode.c61
-rw-r--r--fs/inode.c7
-rw-r--r--fs/iomap.c37
-rw-r--r--fs/nfs/nfs4file.c8
-rw-r--r--fs/nfs/nfs4idmap.c31
-rw-r--r--fs/nfs/super.c5
-rw-r--r--fs/nfs/write.c20
-rw-r--r--fs/nfsd/nfsctl.c4
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/notify/inotify/inotify_user.c6
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/proc_net.c20
-rw-r--r--fs/proc/task_mmu.c22
-rw-r--r--fs/pstore/ram.c12
-rw-r--r--fs/sysfs/dir.c3
-rw-r--r--fs/sysfs/file.c6
-rw-r--r--fs/sysfs/group.c3
-rw-r--r--fs/sysfs/symlink.c3
-rw-r--r--fs/xfs/scrub/repair.c11
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_buf.c19
84 files changed, 1141 insertions, 606 deletions
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 0568fd986821..e432bd27a2e7 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -208,7 +208,7 @@ again:
208 /* The new front of the queue now owns the state variables. */ 208 /* The new front of the queue now owns the state variables. */
209 next = list_entry(vnode->pending_locks.next, 209 next = list_entry(vnode->pending_locks.next,
210 struct file_lock, fl_u.afs.link); 210 struct file_lock, fl_u.afs.link);
211 vnode->lock_key = afs_file_key(next->fl_file); 211 vnode->lock_key = key_get(afs_file_key(next->fl_file));
212 vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; 212 vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
213 vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB; 213 vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
214 goto again; 214 goto again;
@@ -413,7 +413,7 @@ static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
413 /* The new front of the queue now owns the state variables. */ 413 /* The new front of the queue now owns the state variables. */
414 next = list_entry(vnode->pending_locks.next, 414 next = list_entry(vnode->pending_locks.next,
415 struct file_lock, fl_u.afs.link); 415 struct file_lock, fl_u.afs.link);
416 vnode->lock_key = afs_file_key(next->fl_file); 416 vnode->lock_key = key_get(afs_file_key(next->fl_file));
417 vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; 417 vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
418 vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB; 418 vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
419 afs_lock_may_be_available(vnode); 419 afs_lock_may_be_available(vnode);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6b17d3620414..1a4ce07fb406 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -414,7 +414,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
414 } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { 414 } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
415 valid = true; 415 valid = true;
416 } else { 416 } else {
417 vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
418 vnode->cb_v_break = vnode->volume->cb_v_break; 417 vnode->cb_v_break = vnode->volume->cb_v_break;
419 valid = false; 418 valid = false;
420 } 419 }
@@ -546,6 +545,8 @@ void afs_evict_inode(struct inode *inode)
546#endif 545#endif
547 546
548 afs_put_permits(rcu_access_pointer(vnode->permit_cache)); 547 afs_put_permits(rcu_access_pointer(vnode->permit_cache));
548 key_put(vnode->lock_key);
549 vnode->lock_key = NULL;
549 _leave(""); 550 _leave("");
550} 551}
551 552
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
index 07bc10f076aa..d443e2bfa094 100644
--- a/fs/afs/protocol_yfs.h
+++ b/fs/afs/protocol_yfs.h
@@ -161,3 +161,14 @@ struct yfs_xdr_YFSStoreVolumeStatus {
161 struct yfs_xdr_u64 max_quota; 161 struct yfs_xdr_u64 max_quota;
162 struct yfs_xdr_u64 file_quota; 162 struct yfs_xdr_u64 file_quota;
163} __packed; 163} __packed;
164
165enum yfs_lock_type {
166 yfs_LockNone = -1,
167 yfs_LockRead = 0,
168 yfs_LockWrite = 1,
169 yfs_LockExtend = 2,
170 yfs_LockRelease = 3,
171 yfs_LockMandatoryRead = 0x100,
172 yfs_LockMandatoryWrite = 0x101,
173 yfs_LockMandatoryExtend = 0x102,
174};
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a7b44863d502..2c588f9bbbda 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -23,6 +23,7 @@ struct workqueue_struct *afs_async_calls;
23static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); 23static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
24static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *); 24static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
25static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); 25static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
26static void afs_delete_async_call(struct work_struct *);
26static void afs_process_async_call(struct work_struct *); 27static void afs_process_async_call(struct work_struct *);
27static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); 28static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
28static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); 29static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
@@ -203,20 +204,26 @@ void afs_put_call(struct afs_call *call)
203 } 204 }
204} 205}
205 206
207static struct afs_call *afs_get_call(struct afs_call *call,
208 enum afs_call_trace why)
209{
210 int u = atomic_inc_return(&call->usage);
211
212 trace_afs_call(call, why, u,
213 atomic_read(&call->net->nr_outstanding_calls),
214 __builtin_return_address(0));
215 return call;
216}
217
206/* 218/*
207 * Queue the call for actual work. 219 * Queue the call for actual work.
208 */ 220 */
209static void afs_queue_call_work(struct afs_call *call) 221static void afs_queue_call_work(struct afs_call *call)
210{ 222{
211 if (call->type->work) { 223 if (call->type->work) {
212 int u = atomic_inc_return(&call->usage);
213
214 trace_afs_call(call, afs_call_trace_work, u,
215 atomic_read(&call->net->nr_outstanding_calls),
216 __builtin_return_address(0));
217
218 INIT_WORK(&call->work, call->type->work); 224 INIT_WORK(&call->work, call->type->work);
219 225
226 afs_get_call(call, afs_call_trace_work);
220 if (!queue_work(afs_wq, &call->work)) 227 if (!queue_work(afs_wq, &call->work))
221 afs_put_call(call); 228 afs_put_call(call);
222 } 229 }
@@ -398,6 +405,12 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
398 } 405 }
399 } 406 }
400 407
408 /* If the call is going to be asynchronous, we need an extra ref for
409 * the call to hold itself so the caller need not hang on to its ref.
410 */
411 if (call->async)
412 afs_get_call(call, afs_call_trace_get);
413
401 /* create a call */ 414 /* create a call */
402 rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, 415 rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
403 (unsigned long)call, 416 (unsigned long)call,
@@ -438,15 +451,17 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
438 goto error_do_abort; 451 goto error_do_abort;
439 } 452 }
440 453
441 /* at this point, an async call may no longer exist as it may have 454 /* Note that at this point, we may have received the reply or an abort
442 * already completed */ 455 * - and an asynchronous call may already have completed.
443 if (call->async) 456 */
457 if (call->async) {
458 afs_put_call(call);
444 return -EINPROGRESS; 459 return -EINPROGRESS;
460 }
445 461
446 return afs_wait_for_call_to_complete(call, ac); 462 return afs_wait_for_call_to_complete(call, ac);
447 463
448error_do_abort: 464error_do_abort:
449 call->state = AFS_CALL_COMPLETE;
450 if (ret != -ECONNABORTED) { 465 if (ret != -ECONNABORTED) {
451 rxrpc_kernel_abort_call(call->net->socket, rxcall, 466 rxrpc_kernel_abort_call(call->net->socket, rxcall,
452 RX_USER_ABORT, ret, "KSD"); 467 RX_USER_ABORT, ret, "KSD");
@@ -463,8 +478,24 @@ error_do_abort:
463error_kill_call: 478error_kill_call:
464 if (call->type->done) 479 if (call->type->done)
465 call->type->done(call); 480 call->type->done(call);
466 afs_put_call(call); 481
482 /* We need to dispose of the extra ref we grabbed for an async call.
483 * The call, however, might be queued on afs_async_calls and we need to
484 * make sure we don't get any more notifications that might requeue it.
485 */
486 if (call->rxcall) {
487 rxrpc_kernel_end_call(call->net->socket, call->rxcall);
488 call->rxcall = NULL;
489 }
490 if (call->async) {
491 if (cancel_work_sync(&call->async_work))
492 afs_put_call(call);
493 afs_put_call(call);
494 }
495
467 ac->error = ret; 496 ac->error = ret;
497 call->state = AFS_CALL_COMPLETE;
498 afs_put_call(call);
468 _leave(" = %d", ret); 499 _leave(" = %d", ret);
469 return ret; 500 return ret;
470} 501}
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 95d0761cdb34..155dc14caef9 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -42,9 +42,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
42 if (vldb->fs_mask[i] & type_mask) 42 if (vldb->fs_mask[i] & type_mask)
43 nr_servers++; 43 nr_servers++;
44 44
45 slist = kzalloc(sizeof(struct afs_server_list) + 45 slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
46 sizeof(struct afs_server_entry) * nr_servers,
47 GFP_KERNEL);
48 if (!slist) 46 if (!slist)
49 goto error; 47 goto error;
50 48
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 12658c1363ae..5aa57929e8c2 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -803,7 +803,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc,
803 bp = xdr_encode_YFSFid(bp, &vnode->fid); 803 bp = xdr_encode_YFSFid(bp, &vnode->fid);
804 bp = xdr_encode_string(bp, name, namesz); 804 bp = xdr_encode_string(bp, name, namesz);
805 bp = xdr_encode_YFSStoreStatus_mode(bp, mode); 805 bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
806 bp = xdr_encode_u32(bp, 0); /* ViceLockType */ 806 bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
807 yfs_check_req(call, bp); 807 yfs_check_req(call, bp);
808 808
809 afs_use_fs_server(call, fc->cbi); 809 afs_use_fs_server(call, fc->cbi);
diff --git a/fs/aio.c b/fs/aio.c
index b906ff70c90f..aaaaf4d12c73 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1436,6 +1436,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
1436 if (unlikely(!req->ki_filp)) 1436 if (unlikely(!req->ki_filp))
1437 return -EBADF; 1437 return -EBADF;
1438 req->ki_complete = aio_complete_rw; 1438 req->ki_complete = aio_complete_rw;
1439 req->private = NULL;
1439 req->ki_pos = iocb->aio_offset; 1440 req->ki_pos = iocb->aio_offset;
1440 req->ki_flags = iocb_flags(req->ki_filp); 1441 req->ki_flags = iocb_flags(req->ki_filp);
1441 if (iocb->aio_flags & IOCB_FLAG_RESFD) 1442 if (iocb->aio_flags & IOCB_FLAG_RESFD)
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index d441244b79df..28d9c2b1b3bb 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb,
596 pkt.len = dentry->d_name.len; 596 pkt.len = dentry->d_name.len;
597 memcpy(pkt.name, dentry->d_name.name, pkt.len); 597 memcpy(pkt.name, dentry->d_name.name, pkt.len);
598 pkt.name[pkt.len] = '\0'; 598 pkt.name[pkt.len] = '\0';
599 dput(dentry);
600 599
601 if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) 600 if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
602 ret = -EFAULT; 601 ret = -EFAULT;
@@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb,
609 complete_all(&ino->expire_complete); 608 complete_all(&ino->expire_complete);
610 spin_unlock(&sbi->fs_lock); 609 spin_unlock(&sbi->fs_lock);
611 610
611 dput(dentry);
612
612 return ret; 613 return ret;
613} 614}
614 615
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 0e8ea2d9a2bb..078992eee299 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -266,8 +266,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
266 } 266 }
267 root_inode = autofs_get_inode(s, S_IFDIR | 0755); 267 root_inode = autofs_get_inode(s, S_IFDIR | 0755);
268 root = d_make_root(root_inode); 268 root = d_make_root(root_inode);
269 if (!root) 269 if (!root) {
270 ret = -ENOMEM;
270 goto fail_ino; 271 goto fail_ino;
272 }
271 pipe = NULL; 273 pipe = NULL;
272 274
273 root->d_fsdata = ino; 275 root->d_fsdata = ino;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index d0078cbb718b..e996174cbfc0 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -14,13 +14,30 @@
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16 16
17static inline bool spacetab(char c) { return c == ' ' || c == '\t'; }
18static inline char *next_non_spacetab(char *first, const char *last)
19{
20 for (; first <= last; first++)
21 if (!spacetab(*first))
22 return first;
23 return NULL;
24}
25static inline char *next_terminator(char *first, const char *last)
26{
27 for (; first <= last; first++)
28 if (spacetab(*first) || !*first)
29 return first;
30 return NULL;
31}
32
17static int load_script(struct linux_binprm *bprm) 33static int load_script(struct linux_binprm *bprm)
18{ 34{
19 const char *i_arg, *i_name; 35 const char *i_arg, *i_name;
20 char *cp; 36 char *cp, *buf_end;
21 struct file *file; 37 struct file *file;
22 int retval; 38 int retval;
23 39
40 /* Not ours to exec if we don't start with "#!". */
24 if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) 41 if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
25 return -ENOEXEC; 42 return -ENOEXEC;
26 43
@@ -33,23 +50,41 @@ static int load_script(struct linux_binprm *bprm)
33 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) 50 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
34 return -ENOENT; 51 return -ENOENT;
35 52
36 /* 53 /* Release since we are not mapping a binary into memory. */
37 * This section does the #! interpretation.
38 * Sorta complicated, but hopefully it will work. -TYT
39 */
40
41 allow_write_access(bprm->file); 54 allow_write_access(bprm->file);
42 fput(bprm->file); 55 fput(bprm->file);
43 bprm->file = NULL; 56 bprm->file = NULL;
44 57
45 for (cp = bprm->buf+2;; cp++) { 58 /*
46 if (cp >= bprm->buf + BINPRM_BUF_SIZE) 59 * This section handles parsing the #! line into separate
60 * interpreter path and argument strings. We must be careful
61 * because bprm->buf is not yet guaranteed to be NUL-terminated
62 * (though the buffer will have trailing NUL padding when the
63 * file size was smaller than the buffer size).
64 *
65 * We do not want to exec a truncated interpreter path, so either
66 * we find a newline (which indicates nothing is truncated), or
67 * we find a space/tab/NUL after the interpreter path (which
68 * itself may be preceded by spaces/tabs). Truncating the
69 * arguments is fine: the interpreter can re-read the script to
70 * parse them on its own.
71 */
72 buf_end = bprm->buf + sizeof(bprm->buf) - 1;
73 cp = strnchr(bprm->buf, sizeof(bprm->buf), '\n');
74 if (!cp) {
75 cp = next_non_spacetab(bprm->buf + 2, buf_end);
76 if (!cp)
77 return -ENOEXEC; /* Entire buf is spaces/tabs */
78 /*
79 * If there is no later space/tab/NUL we must assume the
80 * interpreter path is truncated.
81 */
82 if (!next_terminator(cp, buf_end))
47 return -ENOEXEC; 83 return -ENOEXEC;
48 if (!*cp || (*cp == '\n')) 84 cp = buf_end;
49 break;
50 } 85 }
86 /* NUL-terminate the buffer and any trailing spaces/tabs. */
51 *cp = '\0'; 87 *cp = '\0';
52
53 while (cp > bprm->buf) { 88 while (cp > bprm->buf) {
54 cp--; 89 cp--;
55 if ((*cp == ' ') || (*cp == '\t')) 90 if ((*cp == ' ') || (*cp == '\t'))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c546cdce77e6..58a4c1217fa8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -104,6 +104,20 @@ void invalidate_bdev(struct block_device *bdev)
104} 104}
105EXPORT_SYMBOL(invalidate_bdev); 105EXPORT_SYMBOL(invalidate_bdev);
106 106
107static void set_init_blocksize(struct block_device *bdev)
108{
109 unsigned bsize = bdev_logical_block_size(bdev);
110 loff_t size = i_size_read(bdev->bd_inode);
111
112 while (bsize < PAGE_SIZE) {
113 if (size & bsize)
114 break;
115 bsize <<= 1;
116 }
117 bdev->bd_block_size = bsize;
118 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
119}
120
107int set_blocksize(struct block_device *bdev, int size) 121int set_blocksize(struct block_device *bdev, int size)
108{ 122{
109 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 123 /* Size must be a power of two, and between 512 and PAGE_SIZE */
@@ -1431,18 +1445,9 @@ EXPORT_SYMBOL(check_disk_change);
1431 1445
1432void bd_set_size(struct block_device *bdev, loff_t size) 1446void bd_set_size(struct block_device *bdev, loff_t size)
1433{ 1447{
1434 unsigned bsize = bdev_logical_block_size(bdev);
1435
1436 inode_lock(bdev->bd_inode); 1448 inode_lock(bdev->bd_inode);
1437 i_size_write(bdev->bd_inode, size); 1449 i_size_write(bdev->bd_inode, size);
1438 inode_unlock(bdev->bd_inode); 1450 inode_unlock(bdev->bd_inode);
1439 while (bsize < PAGE_SIZE) {
1440 if (size & bsize)
1441 break;
1442 bsize <<= 1;
1443 }
1444 bdev->bd_block_size = bsize;
1445 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
1446} 1451}
1447EXPORT_SYMBOL(bd_set_size); 1452EXPORT_SYMBOL(bd_set_size);
1448 1453
@@ -1519,8 +1524,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1519 } 1524 }
1520 } 1525 }
1521 1526
1522 if (!ret) 1527 if (!ret) {
1523 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1528 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1529 set_init_blocksize(bdev);
1530 }
1524 1531
1525 /* 1532 /*
1526 * If the device is invalidated, rescan partition 1533 * If the device is invalidated, rescan partition
@@ -1555,6 +1562,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1555 goto out_clear; 1562 goto out_clear;
1556 } 1563 }
1557 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1564 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1565 set_init_blocksize(bdev);
1558 } 1566 }
1559 1567
1560 if (bdev->bd_bdi == &noop_backing_dev_info) 1568 if (bdev->bd_bdi == &noop_backing_dev_info)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d92462fe66c8..5a6c39b44c84 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
968 return 0; 968 return 0;
969} 969}
970 970
971static struct extent_buffer *alloc_tree_block_no_bg_flush(
972 struct btrfs_trans_handle *trans,
973 struct btrfs_root *root,
974 u64 parent_start,
975 const struct btrfs_disk_key *disk_key,
976 int level,
977 u64 hint,
978 u64 empty_size)
979{
980 struct btrfs_fs_info *fs_info = root->fs_info;
981 struct extent_buffer *ret;
982
983 /*
984 * If we are COWing a node/leaf from the extent, chunk, device or free
985 * space trees, make sure that we do not finish block group creation of
986 * pending block groups. We do this to avoid a deadlock.
987 * COWing can result in allocation of a new chunk, and flushing pending
988 * block groups (btrfs_create_pending_block_groups()) can be triggered
989 * when finishing allocation of a new chunk. Creation of a pending block
990 * group modifies the extent, chunk, device and free space trees,
991 * therefore we could deadlock with ourselves since we are holding a
992 * lock on an extent buffer that btrfs_create_pending_block_groups() may
993 * try to COW later.
994 * For similar reasons, we also need to delay flushing pending block
995 * groups when splitting a leaf or node, from one of those trees, since
996 * we are holding a write lock on it and its parent or when inserting a
997 * new root node for one of those trees.
998 */
999 if (root == fs_info->extent_root ||
1000 root == fs_info->chunk_root ||
1001 root == fs_info->dev_root ||
1002 root == fs_info->free_space_root)
1003 trans->can_flush_pending_bgs = false;
1004
1005 ret = btrfs_alloc_tree_block(trans, root, parent_start,
1006 root->root_key.objectid, disk_key, level,
1007 hint, empty_size);
1008 trans->can_flush_pending_bgs = true;
1009
1010 return ret;
1011}
1012
971/* 1013/*
972 * does the dirty work in cow of a single block. The parent block (if 1014 * does the dirty work in cow of a single block. The parent block (if
973 * supplied) is updated to point to the new cow copy. The new buffer is marked 1015 * supplied) is updated to point to the new cow copy. The new buffer is marked
@@ -1015,26 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1015 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) 1057 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
1016 parent_start = parent->start; 1058 parent_start = parent->start;
1017 1059
1018 /* 1060 cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
1019 * If we are COWing a node/leaf from the extent, chunk or device trees, 1061 level, search_start, empty_size);
1020 * make sure that we do not finish block group creation of pending block
1021 * groups. We do this to avoid a deadlock.
1022 * COWing can result in allocation of a new chunk, and flushing pending
1023 * block groups (btrfs_create_pending_block_groups()) can be triggered
1024 * when finishing allocation of a new chunk. Creation of a pending block
1025 * group modifies the extent, chunk and device trees, therefore we could
1026 * deadlock with ourselves since we are holding a lock on an extent
1027 * buffer that btrfs_create_pending_block_groups() may try to COW later.
1028 */
1029 if (root == fs_info->extent_root ||
1030 root == fs_info->chunk_root ||
1031 root == fs_info->dev_root)
1032 trans->can_flush_pending_bgs = false;
1033
1034 cow = btrfs_alloc_tree_block(trans, root, parent_start,
1035 root->root_key.objectid, &disk_key, level,
1036 search_start, empty_size);
1037 trans->can_flush_pending_bgs = true;
1038 if (IS_ERR(cow)) 1062 if (IS_ERR(cow))
1039 return PTR_ERR(cow); 1063 return PTR_ERR(cow);
1040 1064
@@ -3343,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3343 else 3367 else
3344 btrfs_node_key(lower, &lower_key, 0); 3368 btrfs_node_key(lower, &lower_key, 0);
3345 3369
3346 c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 3370 c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
3347 &lower_key, level, root->node->start, 0); 3371 root->node->start, 0);
3348 if (IS_ERR(c)) 3372 if (IS_ERR(c))
3349 return PTR_ERR(c); 3373 return PTR_ERR(c);
3350 3374
@@ -3473,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3473 mid = (c_nritems + 1) / 2; 3497 mid = (c_nritems + 1) / 2;
3474 btrfs_node_key(c, &disk_key, mid); 3498 btrfs_node_key(c, &disk_key, mid);
3475 3499
3476 split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 3500 split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
3477 &disk_key, level, c->start, 0); 3501 c->start, 0);
3478 if (IS_ERR(split)) 3502 if (IS_ERR(split))
3479 return PTR_ERR(split); 3503 return PTR_ERR(split);
3480 3504
@@ -4258,8 +4282,8 @@ again:
4258 else 4282 else
4259 btrfs_item_key(l, &disk_key, mid); 4283 btrfs_item_key(l, &disk_key, mid);
4260 4284
4261 right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 4285 right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
4262 &disk_key, 0, l->start, 0); 4286 l->start, 0);
4263 if (IS_ERR(right)) 4287 if (IS_ERR(right))
4264 return PTR_ERR(right); 4288 return PTR_ERR(right);
4265 4289
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0a68cf7032f5..7a2a2621f0d9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -35,6 +35,7 @@
35struct btrfs_trans_handle; 35struct btrfs_trans_handle;
36struct btrfs_transaction; 36struct btrfs_transaction;
37struct btrfs_pending_snapshot; 37struct btrfs_pending_snapshot;
38struct btrfs_delayed_ref_root;
38extern struct kmem_cache *btrfs_trans_handle_cachep; 39extern struct kmem_cache *btrfs_trans_handle_cachep;
39extern struct kmem_cache *btrfs_bit_radix_cachep; 40extern struct kmem_cache *btrfs_bit_radix_cachep;
40extern struct kmem_cache *btrfs_path_cachep; 41extern struct kmem_cache *btrfs_path_cachep;
@@ -786,6 +787,9 @@ enum {
786 * main phase. The fs_info::balance_ctl is initialized. 787 * main phase. The fs_info::balance_ctl is initialized.
787 */ 788 */
788 BTRFS_FS_BALANCE_RUNNING, 789 BTRFS_FS_BALANCE_RUNNING,
790
791 /* Indicate that the cleaner thread is awake and doing something. */
792 BTRFS_FS_CLEANER_RUNNING,
789}; 793};
790 794
791struct btrfs_fs_info { 795struct btrfs_fs_info {
@@ -2661,6 +2665,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2661 unsigned long count); 2665 unsigned long count);
2662int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, 2666int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
2663 unsigned long count, u64 transid, int wait); 2667 unsigned long count, u64 transid, int wait);
2668void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2669 struct btrfs_delayed_ref_root *delayed_refs,
2670 struct btrfs_delayed_ref_head *head);
2664int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); 2671int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
2665int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 2672int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
2666 struct btrfs_fs_info *fs_info, u64 bytenr, 2673 struct btrfs_fs_info *fs_info, u64 bytenr,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8da2f380d3c0..6a2a2a951705 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1682,6 +1682,8 @@ static int cleaner_kthread(void *arg)
1682 while (1) { 1682 while (1) {
1683 again = 0; 1683 again = 0;
1684 1684
1685 set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
1686
1685 /* Make the cleaner go to sleep early. */ 1687 /* Make the cleaner go to sleep early. */
1686 if (btrfs_need_cleaner_sleep(fs_info)) 1688 if (btrfs_need_cleaner_sleep(fs_info))
1687 goto sleep; 1689 goto sleep;
@@ -1728,6 +1730,7 @@ static int cleaner_kthread(void *arg)
1728 */ 1730 */
1729 btrfs_delete_unused_bgs(fs_info); 1731 btrfs_delete_unused_bgs(fs_info);
1730sleep: 1732sleep:
1733 clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
1731 if (kthread_should_park()) 1734 if (kthread_should_park())
1732 kthread_parkme(); 1735 kthread_parkme();
1733 if (kthread_should_stop()) 1736 if (kthread_should_stop())
@@ -4201,6 +4204,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
4201 spin_lock(&fs_info->ordered_root_lock); 4204 spin_lock(&fs_info->ordered_root_lock);
4202 } 4205 }
4203 spin_unlock(&fs_info->ordered_root_lock); 4206 spin_unlock(&fs_info->ordered_root_lock);
4207
4208 /*
4209 * We need this here because if we've been flipped read-only we won't
4210 * get sync() from the umount, so we need to make sure any ordered
4211 * extents that haven't had their dirty pages IO start writeout yet
4212 * actually get run and error out properly.
4213 */
4214 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
4204} 4215}
4205 4216
4206static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 4217static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
@@ -4265,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
4265 if (pin_bytes) 4276 if (pin_bytes)
4266 btrfs_pin_extent(fs_info, head->bytenr, 4277 btrfs_pin_extent(fs_info, head->bytenr,
4267 head->num_bytes, 1); 4278 head->num_bytes, 1);
4279 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
4268 btrfs_put_delayed_ref_head(head); 4280 btrfs_put_delayed_ref_head(head);
4269 cond_resched(); 4281 cond_resched();
4270 spin_lock(&delayed_refs->lock); 4282 spin_lock(&delayed_refs->lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b15afeae16df..d81035b7ea7d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2456,12 +2456,10 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
2456 return ret ? ret : 1; 2456 return ret ? ret : 1;
2457} 2457}
2458 2458
2459static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans, 2459void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
2460 struct btrfs_delayed_ref_head *head) 2460 struct btrfs_delayed_ref_root *delayed_refs,
2461 struct btrfs_delayed_ref_head *head)
2461{ 2462{
2462 struct btrfs_fs_info *fs_info = trans->fs_info;
2463 struct btrfs_delayed_ref_root *delayed_refs =
2464 &trans->transaction->delayed_refs;
2465 int nr_items = 1; /* Dropping this ref head update. */ 2463 int nr_items = 1; /* Dropping this ref head update. */
2466 2464
2467 if (head->total_ref_mod < 0) { 2465 if (head->total_ref_mod < 0) {
@@ -2544,7 +2542,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
2544 } 2542 }
2545 } 2543 }
2546 2544
2547 cleanup_ref_head_accounting(trans, head); 2545 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
2548 2546
2549 trace_run_delayed_ref_head(fs_info, head, 0); 2547 trace_run_delayed_ref_head(fs_info, head, 0);
2550 btrfs_delayed_ref_unlock(head); 2548 btrfs_delayed_ref_unlock(head);
@@ -4954,6 +4952,15 @@ static void flush_space(struct btrfs_fs_info *fs_info,
4954 ret = 0; 4952 ret = 0;
4955 break; 4953 break;
4956 case COMMIT_TRANS: 4954 case COMMIT_TRANS:
4955 /*
4956 * If we have pending delayed iputs then we could free up a
4957 * bunch of pinned space, so make sure we run the iputs before
4958 * we do our pinned bytes check below.
4959 */
4960 mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
4961 btrfs_run_delayed_iputs(fs_info);
4962 mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
4963
4957 ret = may_commit_transaction(fs_info, space_info); 4964 ret = may_commit_transaction(fs_info, space_info);
4958 break; 4965 break;
4959 default: 4966 default:
@@ -7188,7 +7195,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
7188 if (head->must_insert_reserved) 7195 if (head->must_insert_reserved)
7189 ret = 1; 7196 ret = 1;
7190 7197
7191 cleanup_ref_head_accounting(trans, head); 7198 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
7192 mutex_unlock(&head->mutex); 7199 mutex_unlock(&head->mutex);
7193 btrfs_put_delayed_ref_head(head); 7200 btrfs_put_delayed_ref_head(head);
7194 return ret; 7201 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 43eb4535319d..5c349667c761 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3129,9 +3129,6 @@ out:
3129 /* once for the tree */ 3129 /* once for the tree */
3130 btrfs_put_ordered_extent(ordered_extent); 3130 btrfs_put_ordered_extent(ordered_extent);
3131 3131
3132 /* Try to release some metadata so we don't get an OOM but don't wait */
3133 btrfs_btree_balance_dirty_nodelay(fs_info);
3134
3135 return ret; 3132 return ret;
3136} 3133}
3137 3134
@@ -3254,6 +3251,8 @@ void btrfs_add_delayed_iput(struct inode *inode)
3254 ASSERT(list_empty(&binode->delayed_iput)); 3251 ASSERT(list_empty(&binode->delayed_iput));
3255 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); 3252 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
3256 spin_unlock(&fs_info->delayed_iput_lock); 3253 spin_unlock(&fs_info->delayed_iput_lock);
3254 if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
3255 wake_up_process(fs_info->cleaner_kthread);
3257} 3256}
3258 3257
3259void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) 3258void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fab9443f6a42..9c8e1734429c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
3221 inode_lock_nested(inode2, I_MUTEX_CHILD); 3221 inode_lock_nested(inode2, I_MUTEX_CHILD);
3222} 3222}
3223 3223
3224static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
3225 struct inode *inode2, u64 loff2, u64 len)
3226{
3227 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
3228 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
3229}
3230
3231static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
3232 struct inode *inode2, u64 loff2, u64 len)
3233{
3234 if (inode1 < inode2) {
3235 swap(inode1, inode2);
3236 swap(loff1, loff2);
3237 } else if (inode1 == inode2 && loff2 < loff1) {
3238 swap(loff1, loff2);
3239 }
3240 lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
3241 lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
3242}
3243
3224static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen, 3244static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
3225 struct inode *dst, u64 dst_loff) 3245 struct inode *dst, u64 dst_loff)
3226{ 3246{
@@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
3242 return -EINVAL; 3262 return -EINVAL;
3243 3263
3244 /* 3264 /*
3245 * Lock destination range to serialize with concurrent readpages(). 3265 * Lock destination range to serialize with concurrent readpages() and
3266 * source range to serialize with relocation.
3246 */ 3267 */
3247 lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3268 btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
3248 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); 3269 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
3249 unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1); 3270 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
3250 3271
3251 return ret; 3272 return ret;
3252} 3273}
@@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
3905 len = ALIGN(src->i_size, bs) - off; 3926 len = ALIGN(src->i_size, bs) - off;
3906 3927
3907 if (destoff > inode->i_size) { 3928 if (destoff > inode->i_size) {
3929 const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
3930
3908 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3931 ret = btrfs_cont_expand(inode, inode->i_size, destoff);
3909 if (ret) 3932 if (ret)
3910 return ret; 3933 return ret;
3934 /*
3935 * We may have truncated the last block if the inode's size is
3936 * not sector size aligned, so we need to wait for writeback to
3937 * complete before proceeding further, otherwise we can race
3938 * with cloning and attempt to increment a reference to an
3939 * extent that no longer exists (writeback completed right after
3940 * we found the previous extent covering eof and before we
3941 * attempted to increment its reference count).
3942 */
3943 ret = btrfs_wait_ordered_range(inode, wb_start,
3944 destoff - wb_start);
3945 if (ret)
3946 return ret;
3911 } 3947 }
3912 3948
3913 /* 3949 /*
3914 * Lock destination range to serialize with concurrent readpages(). 3950 * Lock destination range to serialize with concurrent readpages() and
3951 * source range to serialize with relocation.
3915 */ 3952 */
3916 lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3953 btrfs_double_extent_lock(src, off, inode, destoff, len);
3917 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3954 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
3918 unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1); 3955 btrfs_double_extent_unlock(src, off, inode, destoff, len);
3919 /* 3956 /*
3920 * Truncate page cache pages so that future reads will see the cloned 3957 * Truncate page cache pages so that future reads will see the cloned
3921 * data immediately and not the previous data. 3958 * data immediately and not the previous data.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c5586ffd1426..0a3f122dd61f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1621 flags | SB_RDONLY, device_name, data); 1621 flags | SB_RDONLY, device_name, data);
1622 if (IS_ERR(mnt_root)) { 1622 if (IS_ERR(mnt_root)) {
1623 root = ERR_CAST(mnt_root); 1623 root = ERR_CAST(mnt_root);
1624 kfree(subvol_name);
1624 goto out; 1625 goto out;
1625 } 1626 }
1626 1627
@@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1630 if (error < 0) { 1631 if (error < 0) {
1631 root = ERR_PTR(error); 1632 root = ERR_PTR(error);
1632 mntput(mnt_root); 1633 mntput(mnt_root);
1634 kfree(subvol_name);
1633 goto out; 1635 goto out;
1634 } 1636 }
1635 } 1637 }
1636 } 1638 }
1637 if (IS_ERR(mnt_root)) { 1639 if (IS_ERR(mnt_root)) {
1638 root = ERR_CAST(mnt_root); 1640 root = ERR_CAST(mnt_root);
1641 kfree(subvol_name);
1639 goto out; 1642 goto out;
1640 } 1643 }
1641 1644
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 127fa1535f58..4ec2b660d014 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
850 850
851 btrfs_trans_release_chunk_metadata(trans); 851 btrfs_trans_release_chunk_metadata(trans);
852 852
853 if (lock && should_end_transaction(trans) &&
854 READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
855 spin_lock(&info->trans_lock);
856 if (cur_trans->state == TRANS_STATE_RUNNING)
857 cur_trans->state = TRANS_STATE_BLOCKED;
858 spin_unlock(&info->trans_lock);
859 }
860
861 if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { 853 if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
862 if (throttle) 854 if (throttle)
863 return btrfs_commit_transaction(trans); 855 return btrfs_commit_transaction(trans);
@@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
1879 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1871 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1880} 1872}
1881 1873
1874/*
1875 * Release reserved delayed ref space of all pending block groups of the
1876 * transaction and remove them from the list
1877 */
1878static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
1879{
1880 struct btrfs_fs_info *fs_info = trans->fs_info;
1881 struct btrfs_block_group_cache *block_group, *tmp;
1882
1883 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
1884 btrfs_delayed_refs_rsv_release(fs_info, 1);
1885 list_del_init(&block_group->bg_list);
1886 }
1887}
1888
1882static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) 1889static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1883{ 1890{
1884 /* 1891 /*
@@ -2270,6 +2277,7 @@ scrub_continue:
2270 btrfs_scrub_continue(fs_info); 2277 btrfs_scrub_continue(fs_info);
2271cleanup_transaction: 2278cleanup_transaction:
2272 btrfs_trans_release_metadata(trans); 2279 btrfs_trans_release_metadata(trans);
2280 btrfs_cleanup_pending_block_groups(trans);
2273 btrfs_trans_release_chunk_metadata(trans); 2281 btrfs_trans_release_chunk_metadata(trans);
2274 trans->block_rsv = NULL; 2282 trans->block_rsv = NULL;
2275 btrfs_warn(fs_info, "Skipping commit of aborted transaction."); 2283 btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2576b1a379c9..15561926ab32 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
957 else 957 else
958 fs_devices = alloc_fs_devices(disk_super->fsid, NULL); 958 fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
959 959
960 fs_devices->fsid_change = fsid_change_in_progress;
961
962 if (IS_ERR(fs_devices)) 960 if (IS_ERR(fs_devices))
963 return ERR_CAST(fs_devices); 961 return ERR_CAST(fs_devices);
964 962
963 fs_devices->fsid_change = fsid_change_in_progress;
964
965 mutex_lock(&fs_devices->device_list_mutex); 965 mutex_lock(&fs_devices->device_list_mutex);
966 list_add(&fs_devices->fs_list, &fs_uuids); 966 list_add(&fs_devices->fs_list, &fs_uuids);
967 967
@@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7825 ret = -EUCLEAN; 7825 ret = -EUCLEAN;
7826 goto out; 7826 goto out;
7827 } 7827 }
7828
7829 /* It's possible this device is a dummy for seed device */
7830 if (dev->disk_total_bytes == 0) {
7831 dev = find_device(fs_info->fs_devices->seed, devid, NULL);
7832 if (!dev) {
7833 btrfs_err(fs_info, "failed to find seed devid %llu",
7834 devid);
7835 ret = -EUCLEAN;
7836 goto out;
7837 }
7838 }
7839
7828 if (physical_offset + physical_len > dev->disk_total_bytes) { 7840 if (physical_offset + physical_len > dev->disk_total_bytes) {
7829 btrfs_err(fs_info, 7841 btrfs_err(fs_info,
7830"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu", 7842"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
diff --git a/fs/buffer.c b/fs/buffer.c
index 52d024bfdbc1..48318fb74938 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -200,6 +200,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
200 struct buffer_head *head; 200 struct buffer_head *head;
201 struct page *page; 201 struct page *page;
202 int all_mapped = 1; 202 int all_mapped = 1;
203 static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
203 204
204 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); 205 index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
205 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); 206 page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
@@ -227,15 +228,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
227 * file io on the block device and getblk. It gets dealt with 228 * file io on the block device and getblk. It gets dealt with
228 * elsewhere, don't buffer_error if we had some unmapped buffers 229 * elsewhere, don't buffer_error if we had some unmapped buffers
229 */ 230 */
230 if (all_mapped) { 231 ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
231 printk("__find_get_block_slow() failed. " 232 if (all_mapped && __ratelimit(&last_warned)) {
232 "block=%llu, b_blocknr=%llu\n", 233 printk("__find_get_block_slow() failed. block=%llu, "
233 (unsigned long long)block, 234 "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
234 (unsigned long long)bh->b_blocknr); 235 "device %pg blocksize: %d\n",
235 printk("b_state=0x%08lx, b_size=%zu\n", 236 (unsigned long long)block,
236 bh->b_state, bh->b_size); 237 (unsigned long long)bh->b_blocknr,
237 printk("device %pg blocksize: %d\n", bdev, 238 bh->b_state, bh->b_size, bdev,
238 1 << bd_inode->i_blkbits); 239 1 << bd_inode->i_blkbits);
239 } 240 }
240out_unlock: 241out_unlock:
241 spin_unlock(&bd_mapping->private_lock); 242 spin_unlock(&bd_mapping->private_lock);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5d0c05e288cc..a47c541f8006 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1494,10 +1494,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
1494 if (err < 0 || off >= i_size_read(inode)) { 1494 if (err < 0 || off >= i_size_read(inode)) {
1495 unlock_page(page); 1495 unlock_page(page);
1496 put_page(page); 1496 put_page(page);
1497 if (err == -ENOMEM) 1497 ret = vmf_error(err);
1498 ret = VM_FAULT_OOM;
1499 else
1500 ret = VM_FAULT_SIGBUS;
1501 goto out_inline; 1498 goto out_inline;
1502 } 1499 }
1503 if (err < PAGE_SIZE) 1500 if (err < PAGE_SIZE)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 94c026bba2c2..bba28a5034ba 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1035,6 +1035,8 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci)
1035 list_del_init(&ci->i_snap_realm_item); 1035 list_del_init(&ci->i_snap_realm_item);
1036 ci->i_snap_realm_counter++; 1036 ci->i_snap_realm_counter++;
1037 ci->i_snap_realm = NULL; 1037 ci->i_snap_realm = NULL;
1038 if (realm->ino == ci->i_vino.ino)
1039 realm->inode = NULL;
1038 spin_unlock(&realm->inodes_with_caps_lock); 1040 spin_unlock(&realm->inodes_with_caps_lock);
1039 ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc, 1041 ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
1040 realm); 1042 realm);
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 03f4d24db8fe..9455d3aef0c3 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -3,19 +3,6 @@
3 * quota.c - CephFS quota 3 * quota.c - CephFS quota
4 * 4 *
5 * Copyright (C) 2017-2018 SUSE 5 * Copyright (C) 2017-2018 SUSE
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */ 6 */
20 7
21#include <linux/statfs.h> 8#include <linux/statfs.h>
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 041c27ea8de1..f74193da0e09 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -616,7 +616,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
616 capsnap->size); 616 capsnap->size);
617 617
618 spin_lock(&mdsc->snap_flush_lock); 618 spin_lock(&mdsc->snap_flush_lock);
619 list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); 619 if (list_empty(&ci->i_snap_flush_item))
620 list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
620 spin_unlock(&mdsc->snap_flush_lock); 621 spin_unlock(&mdsc->snap_flush_lock);
621 return 1; /* caller may want to ceph_flush_snaps */ 622 return 1; /* caller may want to ceph_flush_snaps */
622} 623}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e9a7cc488da..da2cd8e89062 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -530,7 +530,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
530 seq_putc(m, ','); 530 seq_putc(m, ',');
531 pos = m->count; 531 pos = m->count;
532 532
533 ret = ceph_print_client_options(m, fsc->client); 533 ret = ceph_print_client_options(m, fsc->client, false);
534 if (ret) 534 if (ret)
535 return ret; 535 return ret;
536 536
@@ -640,7 +640,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
640 opt = NULL; /* fsc->client now owns this */ 640 opt = NULL; /* fsc->client now owns this */
641 641
642 fsc->client->extra_mon_dispatch = extra_mon_dispatch; 642 fsc->client->extra_mon_dispatch = extra_mon_dispatch;
643 fsc->client->osdc.abort_on_full = true; 643 ceph_set_opt(fsc->client, ABORT_ON_FULL);
644 644
645 if (!fsopt->mds_namespace) { 645 if (!fsopt->mds_namespace) {
646 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 646 ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 593fb422d0f3..e92a2fee3c57 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -252,6 +252,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
252 seq_printf(m, ",ACL"); 252 seq_printf(m, ",ACL");
253#endif 253#endif
254 seq_putc(m, '\n'); 254 seq_putc(m, '\n');
255 seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize);
255 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); 256 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
256 seq_printf(m, "Servers:"); 257 seq_printf(m, "Servers:");
257 258
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 26776eddd85d..7652551a1fc4 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
150extern const struct export_operations cifs_export_ops; 150extern const struct export_operations cifs_export_ops;
151#endif /* CONFIG_CIFS_NFSD_EXPORT */ 151#endif /* CONFIG_CIFS_NFSD_EXPORT */
152 152
153#define CIFS_VERSION "2.15" 153#define CIFS_VERSION "2.17"
154#endif /* _CIFSFS_H */ 154#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 01ded7038b19..94dbdbe5be34 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1438,6 +1438,7 @@ struct mid_q_entry {
1438 int mid_state; /* wish this were enum but can not pass to wait_event */ 1438 int mid_state; /* wish this were enum but can not pass to wait_event */
1439 unsigned int mid_flags; 1439 unsigned int mid_flags;
1440 __le16 command; /* smb command code */ 1440 __le16 command; /* smb command code */
1441 unsigned int optype; /* operation type */
1441 bool large_buf:1; /* if valid response, is pointer to large buf */ 1442 bool large_buf:1; /* if valid response, is pointer to large buf */
1442 bool multiRsp:1; /* multiple trans2 responses for one request */ 1443 bool multiRsp:1; /* multiple trans2 responses for one request */
1443 bool multiEnd:1; /* both received */ 1444 bool multiEnd:1; /* both received */
@@ -1574,6 +1575,25 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
1574 kfree(param); 1575 kfree(param);
1575} 1576}
1576 1577
1578static inline bool is_interrupt_error(int error)
1579{
1580 switch (error) {
1581 case -EINTR:
1582 case -ERESTARTSYS:
1583 case -ERESTARTNOHAND:
1584 case -ERESTARTNOINTR:
1585 return true;
1586 }
1587 return false;
1588}
1589
1590static inline bool is_retryable_error(int error)
1591{
1592 if (is_interrupt_error(error) || error == -EAGAIN)
1593 return true;
1594 return false;
1595}
1596
1577#define MID_FREE 0 1597#define MID_FREE 0
1578#define MID_REQUEST_ALLOCATED 1 1598#define MID_REQUEST_ALLOCATED 1
1579#define MID_REQUEST_SUBMITTED 2 1599#define MID_REQUEST_SUBMITTED 2
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b1f49c1c543a..bb54ccf8481c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,24 +128,31 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
128 int rc; 128 int rc;
129 struct dfs_cache_tgt_list tl; 129 struct dfs_cache_tgt_list tl;
130 struct dfs_cache_tgt_iterator *it = NULL; 130 struct dfs_cache_tgt_iterator *it = NULL;
131 char tree[MAX_TREE_SIZE + 1]; 131 char *tree;
132 const char *tcp_host; 132 const char *tcp_host;
133 size_t tcp_host_len; 133 size_t tcp_host_len;
134 const char *dfs_host; 134 const char *dfs_host;
135 size_t dfs_host_len; 135 size_t dfs_host_len;
136 136
137 tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
138 if (!tree)
139 return -ENOMEM;
140
137 if (tcon->ipc) { 141 if (tcon->ipc) {
138 snprintf(tree, sizeof(tree), "\\\\%s\\IPC$", 142 snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
139 tcon->ses->server->hostname); 143 tcon->ses->server->hostname);
140 return CIFSTCon(0, tcon->ses, tree, tcon, nlsc); 144 rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
145 goto out;
141 } 146 }
142 147
143 if (!tcon->dfs_path) 148 if (!tcon->dfs_path) {
144 return CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc); 149 rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc);
150 goto out;
151 }
145 152
146 rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl); 153 rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
147 if (rc) 154 if (rc)
148 return rc; 155 goto out;
149 156
150 extract_unc_hostname(tcon->ses->server->hostname, &tcp_host, 157 extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
151 &tcp_host_len); 158 &tcp_host_len);
@@ -165,7 +172,7 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
165 continue; 172 continue;
166 } 173 }
167 174
168 snprintf(tree, sizeof(tree), "\\%s", tgt); 175 snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
169 176
170 rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc); 177 rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
171 if (!rc) 178 if (!rc)
@@ -182,6 +189,8 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
182 rc = -ENOENT; 189 rc = -ENOENT;
183 } 190 }
184 dfs_cache_free_tgts(&tl); 191 dfs_cache_free_tgts(&tl);
192out:
193 kfree(tree);
185 return rc; 194 return rc;
186} 195}
187#else 196#else
@@ -1540,18 +1549,26 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server)
1540} 1549}
1541 1550
1542static int 1551static int
1543cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1552__cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid,
1553 bool malformed)
1544{ 1554{
1545 int length; 1555 int length;
1546 struct cifs_readdata *rdata = mid->callback_data;
1547 1556
1548 length = cifs_discard_remaining_data(server); 1557 length = cifs_discard_remaining_data(server);
1549 dequeue_mid(mid, rdata->result); 1558 dequeue_mid(mid, malformed);
1550 mid->resp_buf = server->smallbuf; 1559 mid->resp_buf = server->smallbuf;
1551 server->smallbuf = NULL; 1560 server->smallbuf = NULL;
1552 return length; 1561 return length;
1553} 1562}
1554 1563
1564static int
1565cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1566{
1567 struct cifs_readdata *rdata = mid->callback_data;
1568
1569 return __cifs_readv_discard(server, mid, rdata->result);
1570}
1571
1555int 1572int
1556cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1573cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1557{ 1574{
@@ -1593,12 +1610,23 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1593 return -1; 1610 return -1;
1594 } 1611 }
1595 1612
1613 /* set up first two iov for signature check and to get credits */
1614 rdata->iov[0].iov_base = buf;
1615 rdata->iov[0].iov_len = 4;
1616 rdata->iov[1].iov_base = buf + 4;
1617 rdata->iov[1].iov_len = server->total_read - 4;
1618 cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
1619 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
1620 cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n",
1621 rdata->iov[1].iov_base, rdata->iov[1].iov_len);
1622
1596 /* Was the SMB read successful? */ 1623 /* Was the SMB read successful? */
1597 rdata->result = server->ops->map_error(buf, false); 1624 rdata->result = server->ops->map_error(buf, false);
1598 if (rdata->result != 0) { 1625 if (rdata->result != 0) {
1599 cifs_dbg(FYI, "%s: server returned error %d\n", 1626 cifs_dbg(FYI, "%s: server returned error %d\n",
1600 __func__, rdata->result); 1627 __func__, rdata->result);
1601 return cifs_readv_discard(server, mid); 1628 /* normal error on read response */
1629 return __cifs_readv_discard(server, mid, false);
1602 } 1630 }
1603 1631
1604 /* Is there enough to get to the rest of the READ_RSP header? */ 1632 /* Is there enough to get to the rest of the READ_RSP header? */
@@ -1642,14 +1670,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1642 server->total_read += length; 1670 server->total_read += length;
1643 } 1671 }
1644 1672
1645 /* set up first iov for signature check */
1646 rdata->iov[0].iov_base = buf;
1647 rdata->iov[0].iov_len = 4;
1648 rdata->iov[1].iov_base = buf + 4;
1649 rdata->iov[1].iov_len = server->total_read - 4;
1650 cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n",
1651 rdata->iov[0].iov_base, server->total_read);
1652
1653 /* how much data is in the response? */ 1673 /* how much data is in the response? */
1654#ifdef CONFIG_CIFS_SMB_DIRECT 1674#ifdef CONFIG_CIFS_SMB_DIRECT
1655 use_rdma_mr = rdata->mr; 1675 use_rdma_mr = rdata->mr;
@@ -2114,7 +2134,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
2114 2134
2115 for (j = 0; j < nr_pages; j++) { 2135 for (j = 0; j < nr_pages; j++) {
2116 unlock_page(wdata2->pages[j]); 2136 unlock_page(wdata2->pages[j]);
2117 if (rc != 0 && rc != -EAGAIN) { 2137 if (rc != 0 && !is_retryable_error(rc)) {
2118 SetPageError(wdata2->pages[j]); 2138 SetPageError(wdata2->pages[j]);
2119 end_page_writeback(wdata2->pages[j]); 2139 end_page_writeback(wdata2->pages[j]);
2120 put_page(wdata2->pages[j]); 2140 put_page(wdata2->pages[j]);
@@ -2123,7 +2143,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
2123 2143
2124 if (rc) { 2144 if (rc) {
2125 kref_put(&wdata2->refcount, cifs_writedata_release); 2145 kref_put(&wdata2->refcount, cifs_writedata_release);
2126 if (rc == -EAGAIN) 2146 if (is_retryable_error(rc))
2127 continue; 2147 continue;
2128 break; 2148 break;
2129 } 2149 }
@@ -2132,7 +2152,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
2132 i += nr_pages; 2152 i += nr_pages;
2133 } while (i < wdata->nr_pages); 2153 } while (i < wdata->nr_pages);
2134 2154
2135 mapping_set_error(inode->i_mapping, rc); 2155 if (rc != 0 && !is_retryable_error(rc))
2156 mapping_set_error(inode->i_mapping, rc);
2136 kref_put(&wdata->refcount, cifs_writedata_release); 2157 kref_put(&wdata->refcount, cifs_writedata_release);
2137} 2158}
2138 2159
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f66529679ca2..8463c940e0e5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -433,9 +433,10 @@ static void reconn_inval_dfs_target(struct TCP_Server_Info *server,
433 kfree(server->hostname); 433 kfree(server->hostname);
434 434
435 server->hostname = extract_hostname(name); 435 server->hostname = extract_hostname(name);
436 if (!server->hostname) { 436 if (IS_ERR(server->hostname)) {
437 cifs_dbg(FYI, "%s: failed to extract hostname from target: %d\n", 437 cifs_dbg(FYI,
438 __func__, -ENOMEM); 438 "%s: failed to extract hostname from target: %ld\n",
439 __func__, PTR_ERR(server->hostname));
439 } 440 }
440} 441}
441 442
@@ -719,6 +720,21 @@ server_unresponsive(struct TCP_Server_Info *server)
719 return false; 720 return false;
720} 721}
721 722
723static inline bool
724zero_credits(struct TCP_Server_Info *server)
725{
726 int val;
727
728 spin_lock(&server->req_lock);
729 val = server->credits + server->echo_credits + server->oplock_credits;
730 if (server->in_flight == 0 && val == 0) {
731 spin_unlock(&server->req_lock);
732 return true;
733 }
734 spin_unlock(&server->req_lock);
735 return false;
736}
737
722static int 738static int
723cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) 739cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
724{ 740{
@@ -731,6 +747,12 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
731 for (total_read = 0; msg_data_left(smb_msg); total_read += length) { 747 for (total_read = 0; msg_data_left(smb_msg); total_read += length) {
732 try_to_freeze(); 748 try_to_freeze();
733 749
750 /* reconnect if no credits and no requests in flight */
751 if (zero_credits(server)) {
752 cifs_reconnect(server);
753 return -ECONNABORTED;
754 }
755
734 if (server_unresponsive(server)) 756 if (server_unresponsive(server))
735 return -ECONNABORTED; 757 return -ECONNABORTED;
736 if (cifs_rdma_enabled(server) && server->smbd_conn) 758 if (cifs_rdma_enabled(server) && server->smbd_conn)
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index cd63c4a70875..09b7d0d4f6e4 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -776,6 +776,7 @@ static int get_tgt_list(const struct dfs_cache_entry *ce,
776 it->it_name = kstrndup(t->t_name, strlen(t->t_name), 776 it->it_name = kstrndup(t->t_name, strlen(t->t_name),
777 GFP_KERNEL); 777 GFP_KERNEL);
778 if (!it->it_name) { 778 if (!it->it_name) {
779 kfree(it);
779 rc = -ENOMEM; 780 rc = -ENOMEM;
780 goto err_free_it; 781 goto err_free_it;
781 } 782 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e3e3a7550205..659ce1b92c44 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -733,7 +733,8 @@ reopen_success:
733 733
734 if (can_flush) { 734 if (can_flush) {
735 rc = filemap_write_and_wait(inode->i_mapping); 735 rc = filemap_write_and_wait(inode->i_mapping);
736 mapping_set_error(inode->i_mapping, rc); 736 if (!is_interrupt_error(rc))
737 mapping_set_error(inode->i_mapping, rc);
737 738
738 if (tcon->unix_ext) 739 if (tcon->unix_ext)
739 rc = cifs_get_inode_info_unix(&inode, full_path, 740 rc = cifs_get_inode_info_unix(&inode, full_path,
@@ -1132,14 +1133,18 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1132 1133
1133 /* 1134 /*
1134 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1135 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1135 * and check it for zero before using. 1136 * and check it before using.
1136 */ 1137 */
1137 max_buf = tcon->ses->server->maxBuf; 1138 max_buf = tcon->ses->server->maxBuf;
1138 if (!max_buf) { 1139 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1139 free_xid(xid); 1140 free_xid(xid);
1140 return -EINVAL; 1141 return -EINVAL;
1141 } 1142 }
1142 1143
1144 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1145 PAGE_SIZE);
1146 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1147 PAGE_SIZE);
1143 max_num = (max_buf - sizeof(struct smb_hdr)) / 1148 max_num = (max_buf - sizeof(struct smb_hdr)) /
1144 sizeof(LOCKING_ANDX_RANGE); 1149 sizeof(LOCKING_ANDX_RANGE);
1145 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1150 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -1472,12 +1477,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1472 1477
1473 /* 1478 /*
1474 * Accessing maxBuf is racy with cifs_reconnect - need to store value 1479 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1475 * and check it for zero before using. 1480 * and check it before using.
1476 */ 1481 */
1477 max_buf = tcon->ses->server->maxBuf; 1482 max_buf = tcon->ses->server->maxBuf;
1478 if (!max_buf) 1483 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1479 return -EINVAL; 1484 return -EINVAL;
1480 1485
1486 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1487 PAGE_SIZE);
1488 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1489 PAGE_SIZE);
1481 max_num = (max_buf - sizeof(struct smb_hdr)) / 1490 max_num = (max_buf - sizeof(struct smb_hdr)) /
1482 sizeof(LOCKING_ANDX_RANGE); 1491 sizeof(LOCKING_ANDX_RANGE);
1483 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 1492 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -2110,6 +2119,7 @@ static int cifs_writepages(struct address_space *mapping,
2110 pgoff_t end, index; 2119 pgoff_t end, index;
2111 struct cifs_writedata *wdata; 2120 struct cifs_writedata *wdata;
2112 int rc = 0; 2121 int rc = 0;
2122 int saved_rc = 0;
2113 unsigned int xid; 2123 unsigned int xid;
2114 2124
2115 /* 2125 /*
@@ -2138,8 +2148,10 @@ retry:
2138 2148
2139 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, 2149 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2140 &wsize, &credits); 2150 &wsize, &credits);
2141 if (rc) 2151 if (rc != 0) {
2152 done = true;
2142 break; 2153 break;
2154 }
2143 2155
2144 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1; 2156 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2145 2157
@@ -2147,6 +2159,7 @@ retry:
2147 &found_pages); 2159 &found_pages);
2148 if (!wdata) { 2160 if (!wdata) {
2149 rc = -ENOMEM; 2161 rc = -ENOMEM;
2162 done = true;
2150 add_credits_and_wake_if(server, credits, 0); 2163 add_credits_and_wake_if(server, credits, 0);
2151 break; 2164 break;
2152 } 2165 }
@@ -2175,7 +2188,7 @@ retry:
2175 if (rc != 0) { 2188 if (rc != 0) {
2176 add_credits_and_wake_if(server, wdata->credits, 0); 2189 add_credits_and_wake_if(server, wdata->credits, 0);
2177 for (i = 0; i < nr_pages; ++i) { 2190 for (i = 0; i < nr_pages; ++i) {
2178 if (rc == -EAGAIN) 2191 if (is_retryable_error(rc))
2179 redirty_page_for_writepage(wbc, 2192 redirty_page_for_writepage(wbc,
2180 wdata->pages[i]); 2193 wdata->pages[i]);
2181 else 2194 else
@@ -2183,7 +2196,7 @@ retry:
2183 end_page_writeback(wdata->pages[i]); 2196 end_page_writeback(wdata->pages[i]);
2184 put_page(wdata->pages[i]); 2197 put_page(wdata->pages[i]);
2185 } 2198 }
2186 if (rc != -EAGAIN) 2199 if (!is_retryable_error(rc))
2187 mapping_set_error(mapping, rc); 2200 mapping_set_error(mapping, rc);
2188 } 2201 }
2189 kref_put(&wdata->refcount, cifs_writedata_release); 2202 kref_put(&wdata->refcount, cifs_writedata_release);
@@ -2193,6 +2206,15 @@ retry:
2193 continue; 2206 continue;
2194 } 2207 }
2195 2208
2209 /* Return immediately if we received a signal during writing */
2210 if (is_interrupt_error(rc)) {
2211 done = true;
2212 break;
2213 }
2214
2215 if (rc != 0 && saved_rc == 0)
2216 saved_rc = rc;
2217
2196 wbc->nr_to_write -= nr_pages; 2218 wbc->nr_to_write -= nr_pages;
2197 if (wbc->nr_to_write <= 0) 2219 if (wbc->nr_to_write <= 0)
2198 done = true; 2220 done = true;
@@ -2210,6 +2232,9 @@ retry:
2210 goto retry; 2232 goto retry;
2211 } 2233 }
2212 2234
2235 if (saved_rc != 0)
2236 rc = saved_rc;
2237
2213 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 2238 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2214 mapping->writeback_index = index; 2239 mapping->writeback_index = index;
2215 2240
@@ -2242,8 +2267,8 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2242 set_page_writeback(page); 2267 set_page_writeback(page);
2243retry_write: 2268retry_write:
2244 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); 2269 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2245 if (rc == -EAGAIN) { 2270 if (is_retryable_error(rc)) {
2246 if (wbc->sync_mode == WB_SYNC_ALL) 2271 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2247 goto retry_write; 2272 goto retry_write;
2248 redirty_page_for_writepage(wbc, page); 2273 redirty_page_for_writepage(wbc, page);
2249 } else if (rc != 0) { 2274 } else if (rc != 0) {
@@ -2671,6 +2696,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2671 2696
2672 rc = cifs_write_allocate_pages(wdata->pages, nr_pages); 2697 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2673 if (rc) { 2698 if (rc) {
2699 kvfree(wdata->pages);
2674 kfree(wdata); 2700 kfree(wdata);
2675 add_credits_and_wake_if(server, credits, 0); 2701 add_credits_and_wake_if(server, credits, 0);
2676 break; 2702 break;
@@ -2682,6 +2708,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2682 if (rc) { 2708 if (rc) {
2683 for (i = 0; i < nr_pages; i++) 2709 for (i = 0; i < nr_pages; i++)
2684 put_page(wdata->pages[i]); 2710 put_page(wdata->pages[i]);
2711 kvfree(wdata->pages);
2685 kfree(wdata); 2712 kfree(wdata);
2686 add_credits_and_wake_if(server, credits, 0); 2713 add_credits_and_wake_if(server, credits, 0);
2687 break; 2714 break;
@@ -3361,8 +3388,12 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3361 } 3388 }
3362 3389
3363 rc = cifs_read_allocate_pages(rdata, npages); 3390 rc = cifs_read_allocate_pages(rdata, npages);
3364 if (rc) 3391 if (rc) {
3365 goto error; 3392 kvfree(rdata->pages);
3393 kfree(rdata);
3394 add_credits_and_wake_if(server, credits, 0);
3395 break;
3396 }
3366 3397
3367 rdata->tailsz = PAGE_SIZE; 3398 rdata->tailsz = PAGE_SIZE;
3368 } 3399 }
@@ -3382,7 +3413,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3382 if (!rdata->cfile->invalidHandle || 3413 if (!rdata->cfile->invalidHandle ||
3383 !(rc = cifs_reopen_file(rdata->cfile, true))) 3414 !(rc = cifs_reopen_file(rdata->cfile, true)))
3384 rc = server->ops->async_readv(rdata); 3415 rc = server->ops->async_readv(rdata);
3385error:
3386 if (rc) { 3416 if (rc) {
3387 add_credits_and_wake_if(server, rdata->credits, 0); 3417 add_credits_and_wake_if(server, rdata->credits, 0);
3388 kref_put(&rdata->refcount, 3418 kref_put(&rdata->refcount,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 13fb59aadebc..478003644916 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2257,6 +2257,11 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
2257 * the flush returns error? 2257 * the flush returns error?
2258 */ 2258 */
2259 rc = filemap_write_and_wait(inode->i_mapping); 2259 rc = filemap_write_and_wait(inode->i_mapping);
2260 if (is_interrupt_error(rc)) {
2261 rc = -ERESTARTSYS;
2262 goto out;
2263 }
2264
2260 mapping_set_error(inode->i_mapping, rc); 2265 mapping_set_error(inode->i_mapping, rc);
2261 rc = 0; 2266 rc = 0;
2262 2267
@@ -2400,6 +2405,11 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2400 * the flush returns error? 2405 * the flush returns error?
2401 */ 2406 */
2402 rc = filemap_write_and_wait(inode->i_mapping); 2407 rc = filemap_write_and_wait(inode->i_mapping);
2408 if (is_interrupt_error(rc)) {
2409 rc = -ERESTARTSYS;
2410 goto cifs_setattr_exit;
2411 }
2412
2403 mapping_set_error(inode->i_mapping, rc); 2413 mapping_set_error(inode->i_mapping, rc);
2404 rc = 0; 2414 rc = 0;
2405 2415
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 4ed10dd086e6..b204e84b87fb 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -122,12 +122,14 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
122 122
123 /* 123 /*
124 * Accessing maxBuf is racy with cifs_reconnect - need to store value 124 * Accessing maxBuf is racy with cifs_reconnect - need to store value
125 * and check it for zero before using. 125 * and check it before using.
126 */ 126 */
127 max_buf = tcon->ses->server->maxBuf; 127 max_buf = tcon->ses->server->maxBuf;
128 if (!max_buf) 128 if (max_buf < sizeof(struct smb2_lock_element))
129 return -EINVAL; 129 return -EINVAL;
130 130
131 BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
132 max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
131 max_num = max_buf / sizeof(struct smb2_lock_element); 133 max_num = max_buf / sizeof(struct smb2_lock_element);
132 buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); 134 buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
133 if (!buf) 135 if (!buf)
@@ -264,6 +266,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
264 return -EINVAL; 266 return -EINVAL;
265 } 267 }
266 268
269 BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
270 max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
267 max_num = max_buf / sizeof(struct smb2_lock_element); 271 max_num = max_buf / sizeof(struct smb2_lock_element);
268 buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL); 272 buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
269 if (!buf) { 273 if (!buf) {
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index f14533da3a93..01a76bccdb8d 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -293,6 +293,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
293 int rc; 293 int rc;
294 struct smb2_file_all_info *smb2_data; 294 struct smb2_file_all_info *smb2_data;
295 __u32 create_options = 0; 295 __u32 create_options = 0;
296 struct cifs_fid fid;
297 bool no_cached_open = tcon->nohandlecache;
296 298
297 *adjust_tz = false; 299 *adjust_tz = false;
298 *symlink = false; 300 *symlink = false;
@@ -301,6 +303,21 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
301 GFP_KERNEL); 303 GFP_KERNEL);
302 if (smb2_data == NULL) 304 if (smb2_data == NULL)
303 return -ENOMEM; 305 return -ENOMEM;
306
307 /* If it is a root and its handle is cached then use it */
308 if (!strlen(full_path) && !no_cached_open) {
309 rc = open_shroot(xid, tcon, &fid);
310 if (rc)
311 goto out;
312 rc = SMB2_query_info(xid, tcon, fid.persistent_fid,
313 fid.volatile_fid, smb2_data);
314 close_shroot(&tcon->crfid);
315 if (rc)
316 goto out;
317 move_smb2_info_to_cifs(data, smb2_data);
318 goto out;
319 }
320
304 if (backup_cred(cifs_sb)) 321 if (backup_cred(cifs_sb))
305 create_options |= CREATE_OPEN_BACKUP_INTENT; 322 create_options |= CREATE_OPEN_BACKUP_INTENT;
306 323
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 6a9c47541c53..7b8b58fb4d3f 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -648,6 +648,13 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
648 if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK) 648 if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK)
649 return false; 649 return false;
650 650
651 if (rsp->sync_hdr.CreditRequest) {
652 spin_lock(&server->req_lock);
653 server->credits += le16_to_cpu(rsp->sync_hdr.CreditRequest);
654 spin_unlock(&server->req_lock);
655 wake_up(&server->request_q);
656 }
657
651 if (rsp->StructureSize != 658 if (rsp->StructureSize !=
652 smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { 659 smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
653 if (le16_to_cpu(rsp->StructureSize) == 44) 660 if (le16_to_cpu(rsp->StructureSize) == 44)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index cf7eb891804f..6f96e2292856 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -34,6 +34,7 @@
34#include "cifs_ioctl.h" 34#include "cifs_ioctl.h"
35#include "smbdirect.h" 35#include "smbdirect.h"
36 36
37/* Change credits for different ops and return the total number of credits */
37static int 38static int
38change_conf(struct TCP_Server_Info *server) 39change_conf(struct TCP_Server_Info *server)
39{ 40{
@@ -41,17 +42,15 @@ change_conf(struct TCP_Server_Info *server)
41 server->oplock_credits = server->echo_credits = 0; 42 server->oplock_credits = server->echo_credits = 0;
42 switch (server->credits) { 43 switch (server->credits) {
43 case 0: 44 case 0:
44 return -1; 45 return 0;
45 case 1: 46 case 1:
46 server->echoes = false; 47 server->echoes = false;
47 server->oplocks = false; 48 server->oplocks = false;
48 cifs_dbg(VFS, "disabling echoes and oplocks\n");
49 break; 49 break;
50 case 2: 50 case 2:
51 server->echoes = true; 51 server->echoes = true;
52 server->oplocks = false; 52 server->oplocks = false;
53 server->echo_credits = 1; 53 server->echo_credits = 1;
54 cifs_dbg(FYI, "disabling oplocks\n");
55 break; 54 break;
56 default: 55 default:
57 server->echoes = true; 56 server->echoes = true;
@@ -64,14 +63,15 @@ change_conf(struct TCP_Server_Info *server)
64 server->echo_credits = 1; 63 server->echo_credits = 1;
65 } 64 }
66 server->credits -= server->echo_credits + server->oplock_credits; 65 server->credits -= server->echo_credits + server->oplock_credits;
67 return 0; 66 return server->credits + server->echo_credits + server->oplock_credits;
68} 67}
69 68
70static void 69static void
71smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, 70smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
72 const int optype) 71 const int optype)
73{ 72{
74 int *val, rc = 0; 73 int *val, rc = -1;
74
75 spin_lock(&server->req_lock); 75 spin_lock(&server->req_lock);
76 val = server->ops->get_credits_field(server, optype); 76 val = server->ops->get_credits_field(server, optype);
77 77
@@ -101,8 +101,26 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
101 } 101 }
102 spin_unlock(&server->req_lock); 102 spin_unlock(&server->req_lock);
103 wake_up(&server->request_q); 103 wake_up(&server->request_q);
104 if (rc) 104
105 cifs_reconnect(server); 105 if (server->tcpStatus == CifsNeedReconnect)
106 return;
107
108 switch (rc) {
109 case -1:
110 /* change_conf hasn't been executed */
111 break;
112 case 0:
113 cifs_dbg(VFS, "Possible client or server bug - zero credits\n");
114 break;
115 case 1:
116 cifs_dbg(VFS, "disabling echoes and oplocks\n");
117 break;
118 case 2:
119 cifs_dbg(FYI, "disabling oplocks\n");
120 break;
121 default:
122 cifs_dbg(FYI, "add %u credits total=%d\n", add, rc);
123 }
106} 124}
107 125
108static void 126static void
@@ -136,7 +154,11 @@ smb2_get_credits(struct mid_q_entry *mid)
136{ 154{
137 struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)mid->resp_buf; 155 struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)mid->resp_buf;
138 156
139 return le16_to_cpu(shdr->CreditRequest); 157 if (mid->mid_state == MID_RESPONSE_RECEIVED
158 || mid->mid_state == MID_RESPONSE_MALFORMED)
159 return le16_to_cpu(shdr->CreditRequest);
160
161 return 0;
140} 162}
141 163
142static int 164static int
@@ -165,14 +187,14 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
165 187
166 scredits = server->credits; 188 scredits = server->credits;
167 /* can deadlock with reopen */ 189 /* can deadlock with reopen */
168 if (scredits == 1) { 190 if (scredits <= 8) {
169 *num = SMB2_MAX_BUFFER_SIZE; 191 *num = SMB2_MAX_BUFFER_SIZE;
170 *credits = 0; 192 *credits = 0;
171 break; 193 break;
172 } 194 }
173 195
174 /* leave one credit for a possible reopen */ 196 /* leave some credits for reopen and other ops */
175 scredits--; 197 scredits -= 8;
176 *num = min_t(unsigned int, size, 198 *num = min_t(unsigned int, size,
177 scredits * SMB2_MAX_BUFFER_SIZE); 199 scredits * SMB2_MAX_BUFFER_SIZE);
178 200
@@ -844,7 +866,9 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
844 FILE_READ_EA, 866 FILE_READ_EA,
845 FILE_FULL_EA_INFORMATION, 867 FILE_FULL_EA_INFORMATION,
846 SMB2_O_INFO_FILE, 868 SMB2_O_INFO_FILE,
847 SMB2_MAX_EA_BUF, 869 CIFSMaxBufSize -
870 MAX_SMB2_CREATE_RESPONSE_SIZE -
871 MAX_SMB2_CLOSE_RESPONSE_SIZE,
848 &rsp_iov, &buftype, cifs_sb); 872 &rsp_iov, &buftype, cifs_sb);
849 if (rc) { 873 if (rc) {
850 /* 874 /*
@@ -3189,11 +3213,23 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
3189 server->ops->is_status_pending(buf, server, 0)) 3213 server->ops->is_status_pending(buf, server, 0))
3190 return -1; 3214 return -1;
3191 3215
3192 rdata->result = server->ops->map_error(buf, false); 3216 /* set up first two iov to get credits */
3217 rdata->iov[0].iov_base = buf;
3218 rdata->iov[0].iov_len = 4;
3219 rdata->iov[1].iov_base = buf + 4;
3220 rdata->iov[1].iov_len =
3221 min_t(unsigned int, buf_len, server->vals->read_rsp_size) - 4;
3222 cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
3223 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
3224 cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n",
3225 rdata->iov[1].iov_base, rdata->iov[1].iov_len);
3226
3227 rdata->result = server->ops->map_error(buf, true);
3193 if (rdata->result != 0) { 3228 if (rdata->result != 0) {
3194 cifs_dbg(FYI, "%s: server returned error %d\n", 3229 cifs_dbg(FYI, "%s: server returned error %d\n",
3195 __func__, rdata->result); 3230 __func__, rdata->result);
3196 dequeue_mid(mid, rdata->result); 3231 /* normal error on read response */
3232 dequeue_mid(mid, false);
3197 return 0; 3233 return 0;
3198 } 3234 }
3199 3235
@@ -3266,14 +3302,6 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
3266 return 0; 3302 return 0;
3267 } 3303 }
3268 3304
3269 /* set up first iov for signature check */
3270 rdata->iov[0].iov_base = buf;
3271 rdata->iov[0].iov_len = 4;
3272 rdata->iov[1].iov_base = buf + 4;
3273 rdata->iov[1].iov_len = server->vals->read_rsp_size - 4;
3274 cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
3275 rdata->iov[0].iov_base, server->vals->read_rsp_size);
3276
3277 length = rdata->copy_into_pages(server, rdata, &iter); 3305 length = rdata->copy_into_pages(server, rdata, &iter);
3278 3306
3279 kfree(bvec); 3307 kfree(bvec);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e57f6aa1d638..77b3aaa39b35 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -162,24 +162,31 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
162 int rc; 162 int rc;
163 struct dfs_cache_tgt_list tl; 163 struct dfs_cache_tgt_list tl;
164 struct dfs_cache_tgt_iterator *it = NULL; 164 struct dfs_cache_tgt_iterator *it = NULL;
165 char tree[MAX_TREE_SIZE + 1]; 165 char *tree;
166 const char *tcp_host; 166 const char *tcp_host;
167 size_t tcp_host_len; 167 size_t tcp_host_len;
168 const char *dfs_host; 168 const char *dfs_host;
169 size_t dfs_host_len; 169 size_t dfs_host_len;
170 170
171 tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
172 if (!tree)
173 return -ENOMEM;
174
171 if (tcon->ipc) { 175 if (tcon->ipc) {
172 snprintf(tree, sizeof(tree), "\\\\%s\\IPC$", 176 snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
173 tcon->ses->server->hostname); 177 tcon->ses->server->hostname);
174 return SMB2_tcon(0, tcon->ses, tree, tcon, nlsc); 178 rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
179 goto out;
175 } 180 }
176 181
177 if (!tcon->dfs_path) 182 if (!tcon->dfs_path) {
178 return SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc); 183 rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc);
184 goto out;
185 }
179 186
180 rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl); 187 rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
181 if (rc) 188 if (rc)
182 return rc; 189 goto out;
183 190
184 extract_unc_hostname(tcon->ses->server->hostname, &tcp_host, 191 extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
185 &tcp_host_len); 192 &tcp_host_len);
@@ -199,7 +206,7 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
199 continue; 206 continue;
200 } 207 }
201 208
202 snprintf(tree, sizeof(tree), "\\%s", tgt); 209 snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
203 210
204 rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc); 211 rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
205 if (!rc) 212 if (!rc)
@@ -216,6 +223,8 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
216 rc = -ENOENT; 223 rc = -ENOENT;
217 } 224 }
218 dfs_cache_free_tgts(&tl); 225 dfs_cache_free_tgts(&tl);
226out:
227 kfree(tree);
219 return rc; 228 return rc;
220} 229}
221#else 230#else
@@ -2807,6 +2816,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
2807 int resp_buftype = CIFS_NO_BUFFER; 2816 int resp_buftype = CIFS_NO_BUFFER;
2808 struct cifs_ses *ses = tcon->ses; 2817 struct cifs_ses *ses = tcon->ses;
2809 int flags = 0; 2818 int flags = 0;
2819 bool allocated = false;
2810 2820
2811 cifs_dbg(FYI, "Query Info\n"); 2821 cifs_dbg(FYI, "Query Info\n");
2812 2822
@@ -2846,14 +2856,21 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
2846 "Error %d allocating memory for acl\n", 2856 "Error %d allocating memory for acl\n",
2847 rc); 2857 rc);
2848 *dlen = 0; 2858 *dlen = 0;
2859 rc = -ENOMEM;
2849 goto qinf_exit; 2860 goto qinf_exit;
2850 } 2861 }
2862 allocated = true;
2851 } 2863 }
2852 } 2864 }
2853 2865
2854 rc = smb2_validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset), 2866 rc = smb2_validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
2855 le32_to_cpu(rsp->OutputBufferLength), 2867 le32_to_cpu(rsp->OutputBufferLength),
2856 &rsp_iov, min_len, *data); 2868 &rsp_iov, min_len, *data);
2869 if (rc && allocated) {
2870 kfree(*data);
2871 *data = NULL;
2872 *dlen = 0;
2873 }
2857 2874
2858qinf_exit: 2875qinf_exit:
2859 SMB2_query_info_free(&rqst); 2876 SMB2_query_info_free(&rqst);
@@ -2907,9 +2924,10 @@ smb2_echo_callback(struct mid_q_entry *mid)
2907{ 2924{
2908 struct TCP_Server_Info *server = mid->callback_data; 2925 struct TCP_Server_Info *server = mid->callback_data;
2909 struct smb2_echo_rsp *rsp = (struct smb2_echo_rsp *)mid->resp_buf; 2926 struct smb2_echo_rsp *rsp = (struct smb2_echo_rsp *)mid->resp_buf;
2910 unsigned int credits_received = 1; 2927 unsigned int credits_received = 0;
2911 2928
2912 if (mid->mid_state == MID_RESPONSE_RECEIVED) 2929 if (mid->mid_state == MID_RESPONSE_RECEIVED
2930 || mid->mid_state == MID_RESPONSE_MALFORMED)
2913 credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest); 2931 credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
2914 2932
2915 DeleteMidQEntry(mid); 2933 DeleteMidQEntry(mid);
@@ -3166,7 +3184,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
3166 struct TCP_Server_Info *server = tcon->ses->server; 3184 struct TCP_Server_Info *server = tcon->ses->server;
3167 struct smb2_sync_hdr *shdr = 3185 struct smb2_sync_hdr *shdr =
3168 (struct smb2_sync_hdr *)rdata->iov[0].iov_base; 3186 (struct smb2_sync_hdr *)rdata->iov[0].iov_base;
3169 unsigned int credits_received = 1; 3187 unsigned int credits_received = 0;
3170 struct smb_rqst rqst = { .rq_iov = rdata->iov, 3188 struct smb_rqst rqst = { .rq_iov = rdata->iov,
3171 .rq_nvec = 2, 3189 .rq_nvec = 2,
3172 .rq_pages = rdata->pages, 3190 .rq_pages = rdata->pages,
@@ -3205,6 +3223,9 @@ smb2_readv_callback(struct mid_q_entry *mid)
3205 task_io_account_read(rdata->got_bytes); 3223 task_io_account_read(rdata->got_bytes);
3206 cifs_stats_bytes_read(tcon, rdata->got_bytes); 3224 cifs_stats_bytes_read(tcon, rdata->got_bytes);
3207 break; 3225 break;
3226 case MID_RESPONSE_MALFORMED:
3227 credits_received = le16_to_cpu(shdr->CreditRequest);
3228 /* fall through */
3208 default: 3229 default:
3209 if (rdata->result != -ENODATA) 3230 if (rdata->result != -ENODATA)
3210 rdata->result = -EIO; 3231 rdata->result = -EIO;
@@ -3220,8 +3241,17 @@ smb2_readv_callback(struct mid_q_entry *mid)
3220 rdata->mr = NULL; 3241 rdata->mr = NULL;
3221 } 3242 }
3222#endif 3243#endif
3223 if (rdata->result) 3244 if (rdata->result && rdata->result != -ENODATA) {
3224 cifs_stats_fail_inc(tcon, SMB2_READ_HE); 3245 cifs_stats_fail_inc(tcon, SMB2_READ_HE);
3246 trace_smb3_read_err(0 /* xid */,
3247 rdata->cfile->fid.persistent_fid,
3248 tcon->tid, tcon->ses->Suid, rdata->offset,
3249 rdata->bytes, rdata->result);
3250 } else
3251 trace_smb3_read_done(0 /* xid */,
3252 rdata->cfile->fid.persistent_fid,
3253 tcon->tid, tcon->ses->Suid,
3254 rdata->offset, rdata->got_bytes);
3225 3255
3226 queue_work(cifsiod_wq, &rdata->work); 3256 queue_work(cifsiod_wq, &rdata->work);
3227 DeleteMidQEntry(mid); 3257 DeleteMidQEntry(mid);
@@ -3278,12 +3308,14 @@ smb2_async_readv(struct cifs_readdata *rdata)
3278 if (rdata->credits) { 3308 if (rdata->credits) {
3279 shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, 3309 shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
3280 SMB2_MAX_BUFFER_SIZE)); 3310 SMB2_MAX_BUFFER_SIZE));
3281 shdr->CreditRequest = shdr->CreditCharge; 3311 shdr->CreditRequest =
3312 cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
3282 spin_lock(&server->req_lock); 3313 spin_lock(&server->req_lock);
3283 server->credits += rdata->credits - 3314 server->credits += rdata->credits -
3284 le16_to_cpu(shdr->CreditCharge); 3315 le16_to_cpu(shdr->CreditCharge);
3285 spin_unlock(&server->req_lock); 3316 spin_unlock(&server->req_lock);
3286 wake_up(&server->request_q); 3317 wake_up(&server->request_q);
3318 rdata->credits = le16_to_cpu(shdr->CreditCharge);
3287 flags |= CIFS_HAS_CREDITS; 3319 flags |= CIFS_HAS_CREDITS;
3288 } 3320 }
3289 3321
@@ -3294,13 +3326,11 @@ smb2_async_readv(struct cifs_readdata *rdata)
3294 if (rc) { 3326 if (rc) {
3295 kref_put(&rdata->refcount, cifs_readdata_release); 3327 kref_put(&rdata->refcount, cifs_readdata_release);
3296 cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); 3328 cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
3297 trace_smb3_read_err(rc, 0 /* xid */, io_parms.persistent_fid, 3329 trace_smb3_read_err(0 /* xid */, io_parms.persistent_fid,
3298 io_parms.tcon->tid, io_parms.tcon->ses->Suid, 3330 io_parms.tcon->tid,
3299 io_parms.offset, io_parms.length); 3331 io_parms.tcon->ses->Suid,
3300 } else 3332 io_parms.offset, io_parms.length, rc);
3301 trace_smb3_read_done(0 /* xid */, io_parms.persistent_fid, 3333 }
3302 io_parms.tcon->tid, io_parms.tcon->ses->Suid,
3303 io_parms.offset, io_parms.length);
3304 3334
3305 cifs_small_buf_release(buf); 3335 cifs_small_buf_release(buf);
3306 return rc; 3336 return rc;
@@ -3344,10 +3374,11 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
3344 if (rc != -ENODATA) { 3374 if (rc != -ENODATA) {
3345 cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); 3375 cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
3346 cifs_dbg(VFS, "Send error in read = %d\n", rc); 3376 cifs_dbg(VFS, "Send error in read = %d\n", rc);
3377 trace_smb3_read_err(xid, req->PersistentFileId,
3378 io_parms->tcon->tid, ses->Suid,
3379 io_parms->offset, io_parms->length,
3380 rc);
3347 } 3381 }
3348 trace_smb3_read_err(rc, xid, req->PersistentFileId,
3349 io_parms->tcon->tid, ses->Suid,
3350 io_parms->offset, io_parms->length);
3351 free_rsp_buf(resp_buftype, rsp_iov.iov_base); 3382 free_rsp_buf(resp_buftype, rsp_iov.iov_base);
3352 return rc == -ENODATA ? 0 : rc; 3383 return rc == -ENODATA ? 0 : rc;
3353 } else 3384 } else
@@ -3388,7 +3419,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
3388 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 3419 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
3389 unsigned int written; 3420 unsigned int written;
3390 struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; 3421 struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf;
3391 unsigned int credits_received = 1; 3422 unsigned int credits_received = 0;
3392 3423
3393 switch (mid->mid_state) { 3424 switch (mid->mid_state) {
3394 case MID_RESPONSE_RECEIVED: 3425 case MID_RESPONSE_RECEIVED:
@@ -3416,6 +3447,9 @@ smb2_writev_callback(struct mid_q_entry *mid)
3416 case MID_RETRY_NEEDED: 3447 case MID_RETRY_NEEDED:
3417 wdata->result = -EAGAIN; 3448 wdata->result = -EAGAIN;
3418 break; 3449 break;
3450 case MID_RESPONSE_MALFORMED:
3451 credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
3452 /* fall through */
3419 default: 3453 default:
3420 wdata->result = -EIO; 3454 wdata->result = -EIO;
3421 break; 3455 break;
@@ -3433,8 +3467,17 @@ smb2_writev_callback(struct mid_q_entry *mid)
3433 wdata->mr = NULL; 3467 wdata->mr = NULL;
3434 } 3468 }
3435#endif 3469#endif
3436 if (wdata->result) 3470 if (wdata->result) {
3437 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); 3471 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
3472 trace_smb3_write_err(0 /* no xid */,
3473 wdata->cfile->fid.persistent_fid,
3474 tcon->tid, tcon->ses->Suid, wdata->offset,
3475 wdata->bytes, wdata->result);
3476 } else
3477 trace_smb3_write_done(0 /* no xid */,
3478 wdata->cfile->fid.persistent_fid,
3479 tcon->tid, tcon->ses->Suid,
3480 wdata->offset, wdata->bytes);
3438 3481
3439 queue_work(cifsiod_wq, &wdata->work); 3482 queue_work(cifsiod_wq, &wdata->work);
3440 DeleteMidQEntry(mid); 3483 DeleteMidQEntry(mid);
@@ -3555,12 +3598,14 @@ smb2_async_writev(struct cifs_writedata *wdata,
3555 if (wdata->credits) { 3598 if (wdata->credits) {
3556 shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, 3599 shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
3557 SMB2_MAX_BUFFER_SIZE)); 3600 SMB2_MAX_BUFFER_SIZE));
3558 shdr->CreditRequest = shdr->CreditCharge; 3601 shdr->CreditRequest =
3602 cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
3559 spin_lock(&server->req_lock); 3603 spin_lock(&server->req_lock);
3560 server->credits += wdata->credits - 3604 server->credits += wdata->credits -
3561 le16_to_cpu(shdr->CreditCharge); 3605 le16_to_cpu(shdr->CreditCharge);
3562 spin_unlock(&server->req_lock); 3606 spin_unlock(&server->req_lock);
3563 wake_up(&server->request_q); 3607 wake_up(&server->request_q);
3608 wdata->credits = le16_to_cpu(shdr->CreditCharge);
3564 flags |= CIFS_HAS_CREDITS; 3609 flags |= CIFS_HAS_CREDITS;
3565 } 3610 }
3566 3611
@@ -3574,10 +3619,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
3574 wdata->bytes, rc); 3619 wdata->bytes, rc);
3575 kref_put(&wdata->refcount, release); 3620 kref_put(&wdata->refcount, release);
3576 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); 3621 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
3577 } else 3622 }
3578 trace_smb3_write_done(0 /* no xid */, req->PersistentFileId,
3579 tcon->tid, tcon->ses->Suid, wdata->offset,
3580 wdata->bytes);
3581 3623
3582async_writev_out: 3624async_writev_out:
3583 cifs_small_buf_release(req); 3625 cifs_small_buf_release(req);
@@ -3803,8 +3845,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
3803 rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) { 3845 rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) {
3804 srch_inf->endOfSearch = true; 3846 srch_inf->endOfSearch = true;
3805 rc = 0; 3847 rc = 0;
3806 } 3848 } else
3807 cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); 3849 cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
3808 goto qdir_exit; 3850 goto qdir_exit;
3809 } 3851 }
3810 3852
@@ -4399,8 +4441,8 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
4399 rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov); 4441 rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
4400 cifs_small_buf_release(req); 4442 cifs_small_buf_release(req);
4401 4443
4402 please_key_low = (__u64 *)req->LeaseKey; 4444 please_key_low = (__u64 *)lease_key;
4403 please_key_high = (__u64 *)(req->LeaseKey+8); 4445 please_key_high = (__u64 *)(lease_key+8);
4404 if (rc) { 4446 if (rc) {
4405 cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE); 4447 cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
4406 trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid, 4448 trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 7a2d0a2255e6..538e2299805f 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -84,8 +84,9 @@
84 84
85#define NUMBER_OF_SMB2_COMMANDS 0x0013 85#define NUMBER_OF_SMB2_COMMANDS 0x0013
86 86
87/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ 87/* 52 transform hdr + 64 hdr + 88 create rsp */
88#define MAX_SMB2_HDR_SIZE 0x00b0 88#define SMB2_TRANSFORM_HEADER_SIZE 52
89#define MAX_SMB2_HDR_SIZE 204
89 90
90#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) 91#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
91#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) 92#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
@@ -648,6 +649,13 @@ struct smb2_create_req {
648 __u8 Buffer[0]; 649 __u8 Buffer[0];
649} __packed; 650} __packed;
650 651
652/*
653 * Maximum size of a SMB2_CREATE response is 64 (smb2 header) +
654 * 88 (fixed part of create response) + 520 (path) + 150 (contexts) +
655 * 2 bytes of padding.
656 */
657#define MAX_SMB2_CREATE_RESPONSE_SIZE 824
658
651struct smb2_create_rsp { 659struct smb2_create_rsp {
652 struct smb2_sync_hdr sync_hdr; 660 struct smb2_sync_hdr sync_hdr;
653 __le16 StructureSize; /* Must be 89 */ 661 __le16 StructureSize; /* Must be 89 */
@@ -996,6 +1004,11 @@ struct smb2_close_req {
996 __u64 VolatileFileId; /* opaque endianness */ 1004 __u64 VolatileFileId; /* opaque endianness */
997} __packed; 1005} __packed;
998 1006
1007/*
1008 * Maximum size of a SMB2_CLOSE response is 64 (smb2 header) + 60 (data)
1009 */
1010#define MAX_SMB2_CLOSE_RESPONSE_SIZE 124
1011
999struct smb2_close_rsp { 1012struct smb2_close_rsp {
1000 struct smb2_sync_hdr sync_hdr; 1013 struct smb2_sync_hdr sync_hdr;
1001 __le16 StructureSize; /* 60 */ 1014 __le16 StructureSize; /* 60 */
@@ -1398,8 +1411,6 @@ struct smb2_file_link_info { /* encoding of request for level 11 */
1398 char FileName[0]; /* Name to be assigned to new link */ 1411 char FileName[0]; /* Name to be assigned to new link */
1399} __packed; /* level 11 Set */ 1412} __packed; /* level 11 Set */
1400 1413
1401#define SMB2_MAX_EA_BUF 65536
1402
1403struct smb2_file_full_ea_info { /* encoding of response for level 15 */ 1414struct smb2_file_full_ea_info { /* encoding of response for level 15 */
1404 __le32 next_entry_offset; 1415 __le32 next_entry_offset;
1405 __u8 flags; 1416 __u8 flags;
diff --git a/fs/cifs/trace.c b/fs/cifs/trace.c
index bd4a546feec1..465483787193 100644
--- a/fs/cifs/trace.c
+++ b/fs/cifs/trace.c
@@ -3,16 +3,6 @@
3 * Copyright (C) 2018, Microsoft Corporation. 3 * Copyright (C) 2018, Microsoft Corporation.
4 * 4 *
5 * Author(s): Steve French <stfrench@microsoft.com> 5 * Author(s): Steve French <stfrench@microsoft.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU General Public License for more details.
16 */ 6 */
17#define CREATE_TRACE_POINTS 7#define CREATE_TRACE_POINTS
18#include "trace.h" 8#include "trace.h"
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index fb049809555f..59be48206932 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -3,16 +3,6 @@
3 * Copyright (C) 2018, Microsoft Corporation. 3 * Copyright (C) 2018, Microsoft Corporation.
4 * 4 *
5 * Author(s): Steve French <stfrench@microsoft.com> 5 * Author(s): Steve French <stfrench@microsoft.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU General Public License for more details.
16 */ 6 */
17#undef TRACE_SYSTEM 7#undef TRACE_SYSTEM
18#define TRACE_SYSTEM cifs 8#define TRACE_SYSTEM cifs
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 5be7302853b6..53532bd3f50d 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -387,7 +387,7 @@ smbd_done:
387 if (rc < 0 && rc != -EINTR) 387 if (rc < 0 && rc != -EINTR)
388 cifs_dbg(VFS, "Error %d sending data on socket to server\n", 388 cifs_dbg(VFS, "Error %d sending data on socket to server\n",
389 rc); 389 rc);
390 else 390 else if (rc > 0)
391 rc = 0; 391 rc = 0;
392 392
393 return rc; 393 return rc;
@@ -783,8 +783,25 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
783} 783}
784 784
785static void 785static void
786cifs_noop_callback(struct mid_q_entry *mid) 786cifs_compound_callback(struct mid_q_entry *mid)
787{
788 struct TCP_Server_Info *server = mid->server;
789
790 add_credits(server, server->ops->get_credits(mid), mid->optype);
791}
792
793static void
794cifs_compound_last_callback(struct mid_q_entry *mid)
787{ 795{
796 cifs_compound_callback(mid);
797 cifs_wake_up_task(mid);
798}
799
800static void
801cifs_cancelled_callback(struct mid_q_entry *mid)
802{
803 cifs_compound_callback(mid);
804 DeleteMidQEntry(mid);
788} 805}
789 806
790int 807int
@@ -795,7 +812,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
795 int i, j, rc = 0; 812 int i, j, rc = 0;
796 int timeout, optype; 813 int timeout, optype;
797 struct mid_q_entry *midQ[MAX_COMPOUND]; 814 struct mid_q_entry *midQ[MAX_COMPOUND];
798 unsigned int credits = 0; 815 bool cancelled_mid[MAX_COMPOUND] = {false};
816 unsigned int credits[MAX_COMPOUND] = {0};
799 char *buf; 817 char *buf;
800 818
801 timeout = flags & CIFS_TIMEOUT_MASK; 819 timeout = flags & CIFS_TIMEOUT_MASK;
@@ -813,13 +831,31 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
813 return -ENOENT; 831 return -ENOENT;
814 832
815 /* 833 /*
816 * Ensure that we do not send more than 50 overlapping requests 834 * Ensure we obtain 1 credit per request in the compound chain.
817 * to the same server. We may make this configurable later or 835 * It can be optimized further by waiting for all the credits
818 * use ses->maxReq. 836 * at once but this can wait long enough if we don't have enough
837 * credits due to some heavy operations in progress or the server
838 * not granting us much, so a fallback to the current approach is
839 * needed anyway.
819 */ 840 */
820 rc = wait_for_free_request(ses->server, timeout, optype); 841 for (i = 0; i < num_rqst; i++) {
821 if (rc) 842 rc = wait_for_free_request(ses->server, timeout, optype);
822 return rc; 843 if (rc) {
844 /*
845 * We haven't sent an SMB packet to the server yet but
846 * we already obtained credits for i requests in the
847 * compound chain - need to return those credits back
848 * for future use. Note that we need to call add_credits
849 * multiple times to match the way we obtained credits
850 * in the first place and to account for in flight
851 * requests correctly.
852 */
853 for (j = 0; j < i; j++)
854 add_credits(ses->server, 1, optype);
855 return rc;
856 }
857 credits[i] = 1;
858 }
823 859
824 /* 860 /*
825 * Make sure that we sign in the same order that we send on this socket 861 * Make sure that we sign in the same order that we send on this socket
@@ -835,18 +871,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
835 for (j = 0; j < i; j++) 871 for (j = 0; j < i; j++)
836 cifs_delete_mid(midQ[j]); 872 cifs_delete_mid(midQ[j]);
837 mutex_unlock(&ses->server->srv_mutex); 873 mutex_unlock(&ses->server->srv_mutex);
874
838 /* Update # of requests on wire to server */ 875 /* Update # of requests on wire to server */
839 add_credits(ses->server, 1, optype); 876 for (j = 0; j < num_rqst; j++)
877 add_credits(ses->server, credits[j], optype);
840 return PTR_ERR(midQ[i]); 878 return PTR_ERR(midQ[i]);
841 } 879 }
842 880
843 midQ[i]->mid_state = MID_REQUEST_SUBMITTED; 881 midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
882 midQ[i]->optype = optype;
844 /* 883 /*
845 * We don't invoke the callback compounds unless it is the last 884 * Invoke callback for every part of the compound chain
846 * request. 885 * to calculate credits properly. Wake up this thread only when
886 * the last element is received.
847 */ 887 */
848 if (i < num_rqst - 1) 888 if (i < num_rqst - 1)
849 midQ[i]->callback = cifs_noop_callback; 889 midQ[i]->callback = cifs_compound_callback;
890 else
891 midQ[i]->callback = cifs_compound_last_callback;
850 } 892 }
851 cifs_in_send_inc(ses->server); 893 cifs_in_send_inc(ses->server);
852 rc = smb_send_rqst(ses->server, num_rqst, rqst, flags); 894 rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);
@@ -860,8 +902,20 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
860 902
861 mutex_unlock(&ses->server->srv_mutex); 903 mutex_unlock(&ses->server->srv_mutex);
862 904
863 if (rc < 0) 905 if (rc < 0) {
906 /* Sending failed for some reason - return credits back */
907 for (i = 0; i < num_rqst; i++)
908 add_credits(ses->server, credits[i], optype);
864 goto out; 909 goto out;
910 }
911
912 /*
913 * At this point the request is passed to the network stack - we assume
914 * that any credits taken from the server structure on the client have
915 * been spent and we can't return them back. Once we receive responses
916 * we will collect credits granted by the server in the mid callbacks
917 * and add those credits to the server structure.
918 */
865 919
866 /* 920 /*
867 * Compounding is never used during session establish. 921 * Compounding is never used during session establish.
@@ -875,36 +929,34 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
875 929
876 for (i = 0; i < num_rqst; i++) { 930 for (i = 0; i < num_rqst; i++) {
877 rc = wait_for_response(ses->server, midQ[i]); 931 rc = wait_for_response(ses->server, midQ[i]);
878 if (rc != 0) { 932 if (rc != 0)
933 break;
934 }
935 if (rc != 0) {
936 for (; i < num_rqst; i++) {
879 cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n", 937 cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n",
880 midQ[i]->mid, le16_to_cpu(midQ[i]->command)); 938 midQ[i]->mid, le16_to_cpu(midQ[i]->command));
881 send_cancel(ses->server, &rqst[i], midQ[i]); 939 send_cancel(ses->server, &rqst[i], midQ[i]);
882 spin_lock(&GlobalMid_Lock); 940 spin_lock(&GlobalMid_Lock);
883 if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) { 941 if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
884 midQ[i]->mid_flags |= MID_WAIT_CANCELLED; 942 midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
885 midQ[i]->callback = DeleteMidQEntry; 943 midQ[i]->callback = cifs_cancelled_callback;
886 spin_unlock(&GlobalMid_Lock); 944 cancelled_mid[i] = true;
887 add_credits(ses->server, 1, optype); 945 credits[i] = 0;
888 return rc;
889 } 946 }
890 spin_unlock(&GlobalMid_Lock); 947 spin_unlock(&GlobalMid_Lock);
891 } 948 }
892 } 949 }
893 950
894 for (i = 0; i < num_rqst; i++)
895 if (midQ[i]->resp_buf)
896 credits += ses->server->ops->get_credits(midQ[i]);
897 if (!credits)
898 credits = 1;
899
900 for (i = 0; i < num_rqst; i++) { 951 for (i = 0; i < num_rqst; i++) {
901 if (rc < 0) 952 if (rc < 0)
902 goto out; 953 goto out;
903 954
904 rc = cifs_sync_mid_result(midQ[i], ses->server); 955 rc = cifs_sync_mid_result(midQ[i], ses->server);
905 if (rc != 0) { 956 if (rc != 0) {
906 add_credits(ses->server, credits, optype); 957 /* mark this mid as cancelled to not free it below */
907 return rc; 958 cancelled_mid[i] = true;
959 goto out;
908 } 960 }
909 961
910 if (!midQ[i]->resp_buf || 962 if (!midQ[i]->resp_buf ||
@@ -951,9 +1003,10 @@ out:
951 * This is prevented above by using a noop callback that will not 1003 * This is prevented above by using a noop callback that will not
952 * wake this thread except for the very last PDU. 1004 * wake this thread except for the very last PDU.
953 */ 1005 */
954 for (i = 0; i < num_rqst; i++) 1006 for (i = 0; i < num_rqst; i++) {
955 cifs_delete_mid(midQ[i]); 1007 if (!cancelled_mid[i])
956 add_credits(ses->server, credits, optype); 1008 cifs_delete_mid(midQ[i]);
1009 }
957 1010
958 return rc; 1011 return rc;
959} 1012}
diff --git a/fs/dcache.c b/fs/dcache.c
index 2593153471cf..aac41adf4743 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -119,6 +119,7 @@ struct dentry_stat_t dentry_stat = {
119 119
120static DEFINE_PER_CPU(long, nr_dentry); 120static DEFINE_PER_CPU(long, nr_dentry);
121static DEFINE_PER_CPU(long, nr_dentry_unused); 121static DEFINE_PER_CPU(long, nr_dentry_unused);
122static DEFINE_PER_CPU(long, nr_dentry_negative);
122 123
123#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 124#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
124 125
@@ -152,11 +153,22 @@ static long get_nr_dentry_unused(void)
152 return sum < 0 ? 0 : sum; 153 return sum < 0 ? 0 : sum;
153} 154}
154 155
156static long get_nr_dentry_negative(void)
157{
158 int i;
159 long sum = 0;
160
161 for_each_possible_cpu(i)
162 sum += per_cpu(nr_dentry_negative, i);
163 return sum < 0 ? 0 : sum;
164}
165
155int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer, 166int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
156 size_t *lenp, loff_t *ppos) 167 size_t *lenp, loff_t *ppos)
157{ 168{
158 dentry_stat.nr_dentry = get_nr_dentry(); 169 dentry_stat.nr_dentry = get_nr_dentry();
159 dentry_stat.nr_unused = get_nr_dentry_unused(); 170 dentry_stat.nr_unused = get_nr_dentry_unused();
171 dentry_stat.nr_negative = get_nr_dentry_negative();
160 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 172 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
161} 173}
162#endif 174#endif
@@ -317,6 +329,8 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
317 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); 329 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
318 WRITE_ONCE(dentry->d_flags, flags); 330 WRITE_ONCE(dentry->d_flags, flags);
319 dentry->d_inode = NULL; 331 dentry->d_inode = NULL;
332 if (dentry->d_flags & DCACHE_LRU_LIST)
333 this_cpu_inc(nr_dentry_negative);
320} 334}
321 335
322static void dentry_free(struct dentry *dentry) 336static void dentry_free(struct dentry *dentry)
@@ -371,6 +385,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
371 * The per-cpu "nr_dentry_unused" counters are updated with 385 * The per-cpu "nr_dentry_unused" counters are updated with
372 * the DCACHE_LRU_LIST bit. 386 * the DCACHE_LRU_LIST bit.
373 * 387 *
388 * The per-cpu "nr_dentry_negative" counters are only updated
389 * when deleted from or added to the per-superblock LRU list, not
390 * from/to the shrink list. That is to avoid an unneeded dec/inc
391 * pair when moving from LRU to shrink list in select_collect().
392 *
374 * These helper functions make sure we always follow the 393 * These helper functions make sure we always follow the
375 * rules. d_lock must be held by the caller. 394 * rules. d_lock must be held by the caller.
376 */ 395 */
@@ -380,6 +399,8 @@ static void d_lru_add(struct dentry *dentry)
380 D_FLAG_VERIFY(dentry, 0); 399 D_FLAG_VERIFY(dentry, 0);
381 dentry->d_flags |= DCACHE_LRU_LIST; 400 dentry->d_flags |= DCACHE_LRU_LIST;
382 this_cpu_inc(nr_dentry_unused); 401 this_cpu_inc(nr_dentry_unused);
402 if (d_is_negative(dentry))
403 this_cpu_inc(nr_dentry_negative);
383 WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); 404 WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
384} 405}
385 406
@@ -388,6 +409,8 @@ static void d_lru_del(struct dentry *dentry)
388 D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); 409 D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
389 dentry->d_flags &= ~DCACHE_LRU_LIST; 410 dentry->d_flags &= ~DCACHE_LRU_LIST;
390 this_cpu_dec(nr_dentry_unused); 411 this_cpu_dec(nr_dentry_unused);
412 if (d_is_negative(dentry))
413 this_cpu_dec(nr_dentry_negative);
391 WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru)); 414 WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
392} 415}
393 416
@@ -418,6 +441,8 @@ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
418 D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); 441 D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
419 dentry->d_flags &= ~DCACHE_LRU_LIST; 442 dentry->d_flags &= ~DCACHE_LRU_LIST;
420 this_cpu_dec(nr_dentry_unused); 443 this_cpu_dec(nr_dentry_unused);
444 if (d_is_negative(dentry))
445 this_cpu_dec(nr_dentry_negative);
421 list_lru_isolate(lru, &dentry->d_lru); 446 list_lru_isolate(lru, &dentry->d_lru);
422} 447}
423 448
@@ -426,6 +451,8 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
426{ 451{
427 D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST); 452 D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
428 dentry->d_flags |= DCACHE_SHRINK_LIST; 453 dentry->d_flags |= DCACHE_SHRINK_LIST;
454 if (d_is_negative(dentry))
455 this_cpu_dec(nr_dentry_negative);
429 list_lru_isolate_move(lru, &dentry->d_lru, list); 456 list_lru_isolate_move(lru, &dentry->d_lru, list);
430} 457}
431 458
@@ -1188,15 +1215,11 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
1188 */ 1215 */
1189void shrink_dcache_sb(struct super_block *sb) 1216void shrink_dcache_sb(struct super_block *sb)
1190{ 1217{
1191 long freed;
1192
1193 do { 1218 do {
1194 LIST_HEAD(dispose); 1219 LIST_HEAD(dispose);
1195 1220
1196 freed = list_lru_walk(&sb->s_dentry_lru, 1221 list_lru_walk(&sb->s_dentry_lru,
1197 dentry_lru_isolate_shrink, &dispose, 1024); 1222 dentry_lru_isolate_shrink, &dispose, 1024);
1198
1199 this_cpu_sub(nr_dentry_unused, freed);
1200 shrink_dentry_list(&dispose); 1223 shrink_dentry_list(&dispose);
1201 } while (list_lru_count(&sb->s_dentry_lru) > 0); 1224 } while (list_lru_count(&sb->s_dentry_lru) > 0);
1202} 1225}
@@ -1820,6 +1843,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1820 WARN_ON(d_in_lookup(dentry)); 1843 WARN_ON(d_in_lookup(dentry));
1821 1844
1822 spin_lock(&dentry->d_lock); 1845 spin_lock(&dentry->d_lock);
1846 /*
1847 * Decrement negative dentry count if it was in the LRU list.
1848 */
1849 if (dentry->d_flags & DCACHE_LRU_LIST)
1850 this_cpu_dec(nr_dentry_negative);
1823 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 1851 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1824 raw_write_seqcount_begin(&dentry->d_seq); 1852 raw_write_seqcount_begin(&dentry->d_seq);
1825 __d_set_inode_and_type(dentry, inode, add_flags); 1853 __d_set_inode_and_type(dentry, inode, add_flags);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 13b01351dd1c..29c68c5d44d5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -324,7 +324,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
324 inode_unlock(d_inode(dentry->d_parent)); 324 inode_unlock(d_inode(dentry->d_parent));
325 dput(dentry); 325 dput(dentry);
326 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 326 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
327 return NULL; 327 return ERR_PTR(-ENOMEM);
328} 328}
329 329
330static struct dentry *end_creating(struct dentry *dentry) 330static struct dentry *end_creating(struct dentry *dentry)
@@ -347,7 +347,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
347 dentry = start_creating(name, parent); 347 dentry = start_creating(name, parent);
348 348
349 if (IS_ERR(dentry)) 349 if (IS_ERR(dentry))
350 return NULL; 350 return dentry;
351 351
352 inode = debugfs_get_inode(dentry->d_sb); 352 inode = debugfs_get_inode(dentry->d_sb);
353 if (unlikely(!inode)) 353 if (unlikely(!inode))
@@ -386,7 +386,8 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
386 * This function will return a pointer to a dentry if it succeeds. This 386 * This function will return a pointer to a dentry if it succeeds. This
387 * pointer must be passed to the debugfs_remove() function when the file is 387 * pointer must be passed to the debugfs_remove() function when the file is
388 * to be removed (no automatic cleanup happens if your module is unloaded, 388 * to be removed (no automatic cleanup happens if your module is unloaded,
389 * you are responsible here.) If an error occurs, %NULL will be returned. 389 * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
390 * returned.
390 * 391 *
391 * If debugfs is not enabled in the kernel, the value -%ENODEV will be 392 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
392 * returned. 393 * returned.
@@ -464,7 +465,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe);
464 * This function will return a pointer to a dentry if it succeeds. This 465 * This function will return a pointer to a dentry if it succeeds. This
465 * pointer must be passed to the debugfs_remove() function when the file is 466 * pointer must be passed to the debugfs_remove() function when the file is
466 * to be removed (no automatic cleanup happens if your module is unloaded, 467 * to be removed (no automatic cleanup happens if your module is unloaded,
467 * you are responsible here.) If an error occurs, %NULL will be returned. 468 * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
469 * returned.
468 * 470 *
469 * If debugfs is not enabled in the kernel, the value -%ENODEV will be 471 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
470 * returned. 472 * returned.
@@ -495,7 +497,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_size);
495 * This function will return a pointer to a dentry if it succeeds. This 497 * This function will return a pointer to a dentry if it succeeds. This
496 * pointer must be passed to the debugfs_remove() function when the file is 498 * pointer must be passed to the debugfs_remove() function when the file is
497 * to be removed (no automatic cleanup happens if your module is unloaded, 499 * to be removed (no automatic cleanup happens if your module is unloaded,
498 * you are responsible here.) If an error occurs, %NULL will be returned. 500 * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
501 * returned.
499 * 502 *
500 * If debugfs is not enabled in the kernel, the value -%ENODEV will be 503 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
501 * returned. 504 * returned.
@@ -506,7 +509,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
506 struct inode *inode; 509 struct inode *inode;
507 510
508 if (IS_ERR(dentry)) 511 if (IS_ERR(dentry))
509 return NULL; 512 return dentry;
510 513
511 inode = debugfs_get_inode(dentry->d_sb); 514 inode = debugfs_get_inode(dentry->d_sb);
512 if (unlikely(!inode)) 515 if (unlikely(!inode))
@@ -545,7 +548,7 @@ struct dentry *debugfs_create_automount(const char *name,
545 struct inode *inode; 548 struct inode *inode;
546 549
547 if (IS_ERR(dentry)) 550 if (IS_ERR(dentry))
548 return NULL; 551 return dentry;
549 552
550 inode = debugfs_get_inode(dentry->d_sb); 553 inode = debugfs_get_inode(dentry->d_sb);
551 if (unlikely(!inode)) 554 if (unlikely(!inode))
@@ -581,8 +584,8 @@ EXPORT_SYMBOL(debugfs_create_automount);
581 * This function will return a pointer to a dentry if it succeeds. This 584 * This function will return a pointer to a dentry if it succeeds. This
582 * pointer must be passed to the debugfs_remove() function when the symbolic 585 * pointer must be passed to the debugfs_remove() function when the symbolic
583 * link is to be removed (no automatic cleanup happens if your module is 586 * link is to be removed (no automatic cleanup happens if your module is
584 * unloaded, you are responsible here.) If an error occurs, %NULL will be 587 * unloaded, you are responsible here.) If an error occurs, %ERR_PTR(-ERROR)
585 * returned. 588 * will be returned.
586 * 589 *
587 * If debugfs is not enabled in the kernel, the value -%ENODEV will be 590 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
588 * returned. 591 * returned.
@@ -594,12 +597,12 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
594 struct inode *inode; 597 struct inode *inode;
595 char *link = kstrdup(target, GFP_KERNEL); 598 char *link = kstrdup(target, GFP_KERNEL);
596 if (!link) 599 if (!link)
597 return NULL; 600 return ERR_PTR(-ENOMEM);
598 601
599 dentry = start_creating(name, parent); 602 dentry = start_creating(name, parent);
600 if (IS_ERR(dentry)) { 603 if (IS_ERR(dentry)) {
601 kfree(link); 604 kfree(link);
602 return NULL; 605 return dentry;
603 } 606 }
604 607
605 inode = debugfs_get_inode(dentry->d_sb); 608 inode = debugfs_get_inode(dentry->d_sb);
@@ -787,6 +790,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
787 struct dentry *dentry = NULL, *trap; 790 struct dentry *dentry = NULL, *trap;
788 struct name_snapshot old_name; 791 struct name_snapshot old_name;
789 792
793 if (IS_ERR(old_dir))
794 return old_dir;
795 if (IS_ERR(new_dir))
796 return new_dir;
797 if (IS_ERR_OR_NULL(old_dentry))
798 return old_dentry;
799
790 trap = lock_rename(new_dir, old_dir); 800 trap = lock_rename(new_dir, old_dir);
791 /* Source or destination directories don't exist? */ 801 /* Source or destination directories don't exist? */
792 if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir)) 802 if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir))
@@ -820,7 +830,9 @@ exit:
820 if (dentry && !IS_ERR(dentry)) 830 if (dentry && !IS_ERR(dentry))
821 dput(dentry); 831 dput(dentry);
822 unlock_rename(new_dir, old_dir); 832 unlock_rename(new_dir, old_dir);
823 return NULL; 833 if (IS_ERR(dentry))
834 return dentry;
835 return ERR_PTR(-EINVAL);
824} 836}
825EXPORT_SYMBOL_GPL(debugfs_rename); 837EXPORT_SYMBOL_GPL(debugfs_rename);
826 838
diff --git a/fs/direct-io.c b/fs/direct-io.c
index dbc1a1f080ce..ec2fb6fe6d37 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -679,6 +679,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
679 unsigned long fs_count; /* Number of filesystem-sized blocks */ 679 unsigned long fs_count; /* Number of filesystem-sized blocks */
680 int create; 680 int create;
681 unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor; 681 unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor;
682 loff_t i_size;
682 683
683 /* 684 /*
684 * If there was a memory error and we've overwritten all the 685 * If there was a memory error and we've overwritten all the
@@ -708,8 +709,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
708 */ 709 */
709 create = dio->op == REQ_OP_WRITE; 710 create = dio->op == REQ_OP_WRITE;
710 if (dio->flags & DIO_SKIP_HOLES) { 711 if (dio->flags & DIO_SKIP_HOLES) {
711 if (fs_startblk <= ((i_size_read(dio->inode) - 1) >> 712 i_size = i_size_read(dio->inode);
712 i_blkbits)) 713 if (i_size && fs_startblk <= (i_size - 1) >> i_blkbits)
713 create = 0; 714 create = 0;
714 } 715 }
715 716
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 82377017130f..d31b6c72b476 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
21 spin_lock(&sb->s_inode_list_lock); 21 spin_lock(&sb->s_inode_list_lock);
22 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 22 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
23 spin_lock(&inode->i_lock); 23 spin_lock(&inode->i_lock);
24 /*
25 * We must skip inodes in unusual state. We may also skip
26 * inodes without pages but we deliberately won't in case
27 * we need to reschedule to avoid softlockups.
28 */
24 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 29 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
25 (inode->i_mapping->nrpages == 0)) { 30 (inode->i_mapping->nrpages == 0 && !need_resched())) {
26 spin_unlock(&inode->i_lock); 31 spin_unlock(&inode->i_lock);
27 continue; 32 continue;
28 } 33 }
@@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
30 spin_unlock(&inode->i_lock); 35 spin_unlock(&inode->i_lock);
31 spin_unlock(&sb->s_inode_list_lock); 36 spin_unlock(&sb->s_inode_list_lock);
32 37
38 cond_resched();
33 invalidate_mapping_pages(inode->i_mapping, 0, -1); 39 invalidate_mapping_pages(inode->i_mapping, 0, -1);
34 iput(toput_inode); 40 iput(toput_inode);
35 toput_inode = inode; 41 toput_inode = inode;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 712f00995390..5508baa11bb6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -116,16 +116,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
116 goto out; 116 goto out;
117 } 117 }
118 118
119 ret = file_write_and_wait_range(file, start, end);
120 if (ret)
121 return ret;
122
123 if (!journal) { 119 if (!journal) {
124 struct writeback_control wbc = { 120 ret = __generic_file_fsync(file, start, end, datasync);
125 .sync_mode = WB_SYNC_ALL
126 };
127
128 ret = ext4_write_inode(inode, &wbc);
129 if (!ret) 121 if (!ret)
130 ret = ext4_sync_parent(inode); 122 ret = ext4_sync_parent(inode);
131 if (test_opt(inode->i_sb, BARRIER)) 123 if (test_opt(inode->i_sb, BARRIER))
@@ -133,6 +125,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
133 goto out; 125 goto out;
134 } 126 }
135 127
128 ret = file_write_and_wait_range(file, start, end);
129 if (ret)
130 return ret;
136 /* 131 /*
137 * data=writeback,ordered: 132 * data=writeback,ordered:
138 * The caller's filemap_fdatawrite()/wait will sync the data. 133 * The caller's filemap_fdatawrite()/wait will sync the data.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b40168fcc94a..36855c1f8daf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context {
331 struct work_struct work; 331 struct work_struct work;
332}; 332};
333 333
334static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
335{
336 down_write(&bdi->wb_switch_rwsem);
337}
338
339static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
340{
341 up_write(&bdi->wb_switch_rwsem);
342}
343
334static void inode_switch_wbs_work_fn(struct work_struct *work) 344static void inode_switch_wbs_work_fn(struct work_struct *work)
335{ 345{
336 struct inode_switch_wbs_context *isw = 346 struct inode_switch_wbs_context *isw =
337 container_of(work, struct inode_switch_wbs_context, work); 347 container_of(work, struct inode_switch_wbs_context, work);
338 struct inode *inode = isw->inode; 348 struct inode *inode = isw->inode;
349 struct backing_dev_info *bdi = inode_to_bdi(inode);
339 struct address_space *mapping = inode->i_mapping; 350 struct address_space *mapping = inode->i_mapping;
340 struct bdi_writeback *old_wb = inode->i_wb; 351 struct bdi_writeback *old_wb = inode->i_wb;
341 struct bdi_writeback *new_wb = isw->new_wb; 352 struct bdi_writeback *new_wb = isw->new_wb;
@@ -344,6 +355,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
344 bool switched = false; 355 bool switched = false;
345 356
346 /* 357 /*
358 * If @inode switches cgwb membership while sync_inodes_sb() is
359 * being issued, sync_inodes_sb() might miss it. Synchronize.
360 */
361 down_read(&bdi->wb_switch_rwsem);
362
363 /*
347 * By the time control reaches here, RCU grace period has passed 364 * By the time control reaches here, RCU grace period has passed
348 * since I_WB_SWITCH assertion and all wb stat update transactions 365 * since I_WB_SWITCH assertion and all wb stat update transactions
349 * between unlocked_inode_to_wb_begin/end() are guaranteed to be 366 * between unlocked_inode_to_wb_begin/end() are guaranteed to be
@@ -428,6 +445,8 @@ skip_switch:
428 spin_unlock(&new_wb->list_lock); 445 spin_unlock(&new_wb->list_lock);
429 spin_unlock(&old_wb->list_lock); 446 spin_unlock(&old_wb->list_lock);
430 447
448 up_read(&bdi->wb_switch_rwsem);
449
431 if (switched) { 450 if (switched) {
432 wb_wakeup(new_wb); 451 wb_wakeup(new_wb);
433 wb_put(old_wb); 452 wb_put(old_wb);
@@ -468,9 +487,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
468 if (inode->i_state & I_WB_SWITCH) 487 if (inode->i_state & I_WB_SWITCH)
469 return; 488 return;
470 489
490 /*
491 * Avoid starting new switches while sync_inodes_sb() is in
492 * progress. Otherwise, if the down_write protected issue path
493 * blocks heavily, we might end up starting a large number of
494 * switches which will block on the rwsem.
495 */
496 if (!down_read_trylock(&bdi->wb_switch_rwsem))
497 return;
498
471 isw = kzalloc(sizeof(*isw), GFP_ATOMIC); 499 isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
472 if (!isw) 500 if (!isw)
473 return; 501 goto out_unlock;
474 502
475 /* find and pin the new wb */ 503 /* find and pin the new wb */
476 rcu_read_lock(); 504 rcu_read_lock();
@@ -504,12 +532,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
504 * Let's continue after I_WB_SWITCH is guaranteed to be visible. 532 * Let's continue after I_WB_SWITCH is guaranteed to be visible.
505 */ 533 */
506 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); 534 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
507 return; 535 goto out_unlock;
508 536
509out_free: 537out_free:
510 if (isw->new_wb) 538 if (isw->new_wb)
511 wb_put(isw->new_wb); 539 wb_put(isw->new_wb);
512 kfree(isw); 540 kfree(isw);
541out_unlock:
542 up_read(&bdi->wb_switch_rwsem);
513} 543}
514 544
515/** 545/**
@@ -887,6 +917,9 @@ fs_initcall(cgroup_writeback_init);
887 917
888#else /* CONFIG_CGROUP_WRITEBACK */ 918#else /* CONFIG_CGROUP_WRITEBACK */
889 919
920static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
921static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
922
890static struct bdi_writeback * 923static struct bdi_writeback *
891locked_inode_to_wb_and_lock_list(struct inode *inode) 924locked_inode_to_wb_and_lock_list(struct inode *inode)
892 __releases(&inode->i_lock) 925 __releases(&inode->i_lock)
@@ -2413,8 +2446,11 @@ void sync_inodes_sb(struct super_block *sb)
2413 return; 2446 return;
2414 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 2447 WARN_ON(!rwsem_is_locked(&sb->s_umount));
2415 2448
2449 /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
2450 bdi_down_write_wb_switch_rwsem(bdi);
2416 bdi_split_work_to_wbs(bdi, &work, false); 2451 bdi_split_work_to_wbs(bdi, &work, false);
2417 wb_wait_for_completion(bdi, &done); 2452 wb_wait_for_completion(bdi, &done);
2453 bdi_up_write_wb_switch_rwsem(bdi);
2418 2454
2419 wait_sb_inodes(sb); 2455 wait_sb_inodes(sb);
2420} 2456}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index a5e516a40e7a..809c0f2f9942 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1742,7 +1742,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1742 req->in.h.nodeid = outarg->nodeid; 1742 req->in.h.nodeid = outarg->nodeid;
1743 req->in.numargs = 2; 1743 req->in.numargs = 2;
1744 req->in.argpages = 1; 1744 req->in.argpages = 1;
1745 req->page_descs[0].offset = offset;
1746 req->end = fuse_retrieve_end; 1745 req->end = fuse_retrieve_end;
1747 1746
1748 index = outarg->offset >> PAGE_SHIFT; 1747 index = outarg->offset >> PAGE_SHIFT;
@@ -1757,6 +1756,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1757 1756
1758 this_num = min_t(unsigned, num, PAGE_SIZE - offset); 1757 this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1759 req->pages[req->num_pages] = page; 1758 req->pages[req->num_pages] = page;
1759 req->page_descs[req->num_pages].offset = offset;
1760 req->page_descs[req->num_pages].length = this_num; 1760 req->page_descs[req->num_pages].length = this_num;
1761 req->num_pages++; 1761 req->num_pages++;
1762 1762
@@ -2077,8 +2077,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
2077 2077
2078 ret = fuse_dev_do_write(fud, &cs, len); 2078 ret = fuse_dev_do_write(fud, &cs, len);
2079 2079
2080 pipe_lock(pipe);
2080 for (idx = 0; idx < nbuf; idx++) 2081 for (idx = 0; idx < nbuf; idx++)
2081 pipe_buf_release(pipe, &bufs[idx]); 2082 pipe_buf_release(pipe, &bufs[idx]);
2083 pipe_unlock(pipe);
2082 2084
2083out: 2085out:
2084 kvfree(bufs); 2086 kvfree(bufs);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ffaffe18352a..a59c16bd90ac 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1782,7 +1782,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
1782 spin_unlock(&fc->lock); 1782 spin_unlock(&fc->lock);
1783 1783
1784 dec_wb_stat(&bdi->wb, WB_WRITEBACK); 1784 dec_wb_stat(&bdi->wb, WB_WRITEBACK);
1785 dec_node_page_state(page, NR_WRITEBACK_TEMP); 1785 dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
1786 wb_writeout_inc(&bdi->wb); 1786 wb_writeout_inc(&bdi->wb);
1787 fuse_writepage_free(fc, new_req); 1787 fuse_writepage_free(fc, new_req);
1788 fuse_request_free(new_req); 1788 fuse_request_free(new_req);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 76baaa6be393..c2d4099429be 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -628,6 +628,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
628 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 628 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
629 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 629 fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
630 fc->user_ns = get_user_ns(user_ns); 630 fc->user_ns = get_user_ns(user_ns);
631 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
631} 632}
632EXPORT_SYMBOL_GPL(fuse_conn_init); 633EXPORT_SYMBOL_GPL(fuse_conn_init);
633 634
@@ -1162,7 +1163,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1162 fc->user_id = d.user_id; 1163 fc->user_id = d.user_id;
1163 fc->group_id = d.group_id; 1164 fc->group_id = d.group_id;
1164 fc->max_read = max_t(unsigned, 4096, d.max_read); 1165 fc->max_read = max_t(unsigned, 4096, d.max_read);
1165 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
1166 1166
1167 /* Used by get_root_inode() */ 1167 /* Used by get_root_inode() */
1168 sb->s_fs_info = fc; 1168 sb->s_fs_info = fc;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index f15b4c57c4bd..78510ab91835 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,7 +28,6 @@
28#include "util.h" 28#include "util.h"
29#include "trans.h" 29#include "trans.h"
30#include "dir.h" 30#include "dir.h"
31#include "lops.h"
32 31
33struct workqueue_struct *gfs2_freeze_wq; 32struct workqueue_struct *gfs2_freeze_wq;
34 33
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5bfaf381921a..b8830fda51e8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -733,7 +733,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
733 lh->lh_crc = cpu_to_be32(crc); 733 lh->lh_crc = cpu_to_be32(crc);
734 734
735 gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr); 735 gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
736 gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags); 736 gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, op_flags);
737 log_flush_wait(sdp); 737 log_flush_wait(sdp);
738} 738}
739 739
@@ -810,7 +810,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
810 810
811 gfs2_ordered_write(sdp); 811 gfs2_ordered_write(sdp);
812 lops_before_commit(sdp, tr); 812 lops_before_commit(sdp, tr);
813 gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE); 813 gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
814 814
815 if (sdp->sd_log_head != sdp->sd_log_flush_head) { 815 if (sdp->sd_log_head != sdp->sd_log_flush_head) {
816 log_flush_wait(sdp); 816 log_flush_wait(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 94dcab655bc0..2295042bc625 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,9 +17,7 @@
17#include <linux/bio.h> 17#include <linux/bio.h>
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/list_sort.h> 19#include <linux/list_sort.h>
20#include <linux/blkdev.h>
21 20
22#include "bmap.h"
23#include "dir.h" 21#include "dir.h"
24#include "gfs2.h" 22#include "gfs2.h"
25#include "incore.h" 23#include "incore.h"
@@ -195,6 +193,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
195/** 193/**
196 * gfs2_end_log_write - end of i/o to the log 194 * gfs2_end_log_write - end of i/o to the log
197 * @bio: The bio 195 * @bio: The bio
196 * @error: Status of i/o request
198 * 197 *
199 * Each bio_vec contains either data from the pagecache or data 198 * Each bio_vec contains either data from the pagecache or data
200 * relating to the log itself. Here we iterate over the bio_vec 199 * relating to the log itself. Here we iterate over the bio_vec
@@ -231,19 +230,20 @@ static void gfs2_end_log_write(struct bio *bio)
231/** 230/**
232 * gfs2_log_submit_bio - Submit any pending log bio 231 * gfs2_log_submit_bio - Submit any pending log bio
233 * @biop: Address of the bio pointer 232 * @biop: Address of the bio pointer
234 * @opf: REQ_OP | op_flags 233 * @op: REQ_OP
234 * @op_flags: req_flag_bits
235 * 235 *
236 * Submit any pending part-built or full bio to the block device. If 236 * Submit any pending part-built or full bio to the block device. If
237 * there is no pending bio, then this is a no-op. 237 * there is no pending bio, then this is a no-op.
238 */ 238 */
239 239
240void gfs2_log_submit_bio(struct bio **biop, int opf) 240void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags)
241{ 241{
242 struct bio *bio = *biop; 242 struct bio *bio = *biop;
243 if (bio) { 243 if (bio) {
244 struct gfs2_sbd *sdp = bio->bi_private; 244 struct gfs2_sbd *sdp = bio->bi_private;
245 atomic_inc(&sdp->sd_log_in_flight); 245 atomic_inc(&sdp->sd_log_in_flight);
246 bio->bi_opf = opf; 246 bio_set_op_attrs(bio, op, op_flags);
247 submit_bio(bio); 247 submit_bio(bio);
248 *biop = NULL; 248 *biop = NULL;
249 } 249 }
@@ -304,7 +304,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
304 nblk >>= sdp->sd_fsb2bb_shift; 304 nblk >>= sdp->sd_fsb2bb_shift;
305 if (blkno == nblk && !flush) 305 if (blkno == nblk && !flush)
306 return bio; 306 return bio;
307 gfs2_log_submit_bio(biop, op); 307 gfs2_log_submit_bio(biop, op, 0);
308 } 308 }
309 309
310 *biop = gfs2_log_alloc_bio(sdp, blkno, end_io); 310 *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
@@ -375,184 +375,6 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
375 gfs2_log_bmap(sdp)); 375 gfs2_log_bmap(sdp));
376} 376}
377 377
378/**
379 * gfs2_end_log_read - end I/O callback for reads from the log
380 * @bio: The bio
381 *
382 * Simply unlock the pages in the bio. The main thread will wait on them and
383 * process them in order as necessary.
384 */
385
386static void gfs2_end_log_read(struct bio *bio)
387{
388 struct page *page;
389 struct bio_vec *bvec;
390 int i;
391
392 bio_for_each_segment_all(bvec, bio, i) {
393 page = bvec->bv_page;
394 if (bio->bi_status) {
395 int err = blk_status_to_errno(bio->bi_status);
396
397 SetPageError(page);
398 mapping_set_error(page->mapping, err);
399 }
400 unlock_page(page);
401 }
402
403 bio_put(bio);
404}
405
406/**
407 * gfs2_jhead_pg_srch - Look for the journal head in a given page.
408 * @jd: The journal descriptor
409 * @page: The page to look in
410 *
411 * Returns: 1 if found, 0 otherwise.
412 */
413
414static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
415 struct gfs2_log_header_host *head,
416 struct page *page)
417{
418 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
419 struct gfs2_log_header_host uninitialized_var(lh);
420 void *kaddr = kmap_atomic(page);
421 unsigned int offset;
422 bool ret = false;
423
424 for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
425 if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
426 if (lh.lh_sequence > head->lh_sequence)
427 *head = lh;
428 else {
429 ret = true;
430 break;
431 }
432 }
433 }
434 kunmap_atomic(kaddr);
435 return ret;
436}
437
438/**
439 * gfs2_jhead_process_page - Search/cleanup a page
440 * @jd: The journal descriptor
441 * @index: Index of the page to look into
442 * @done: If set, perform only cleanup, else search and set if found.
443 *
444 * Find the page with 'index' in the journal's mapping. Search the page for
445 * the journal head if requested (cleanup == false). Release refs on the
446 * page so the page cache can reclaim it (put_page() twice). We grabbed a
447 * reference on this page two times, first when we did a find_or_create_page()
448 * to obtain the page to add it to the bio and second when we do a
449 * find_get_page() here to get the page to wait on while I/O on it is being
450 * completed.
451 * This function is also used to free up a page we might've grabbed but not
452 * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
453 * submitted the I/O, but we already found the jhead so we only need to drop
454 * our references to the page.
455 */
456
457static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
458 struct gfs2_log_header_host *head,
459 bool *done)
460{
461 struct page *page;
462
463 page = find_get_page(jd->jd_inode->i_mapping, index);
464 wait_on_page_locked(page);
465
466 if (PageError(page))
467 *done = true;
468
469 if (!*done)
470 *done = gfs2_jhead_pg_srch(jd, head, page);
471
472 put_page(page); /* Once for find_get_page */
473 put_page(page); /* Once more for find_or_create_page */
474}
475
476/**
477 * gfs2_find_jhead - find the head of a log
478 * @jd: The journal descriptor
479 * @head: The log descriptor for the head of the log is returned here
480 *
481 * Do a search of a journal by reading it in large chunks using bios and find
482 * the valid log entry with the highest sequence number. (i.e. the log head)
483 *
484 * Returns: 0 on success, errno otherwise
485 */
486
487int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
488{
489 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
490 struct address_space *mapping = jd->jd_inode->i_mapping;
491 struct gfs2_journal_extent *je;
492 u32 block, read_idx = 0, submit_idx = 0, index = 0;
493 int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
494 int blocks_per_page = 1 << shift, sz, ret = 0;
495 struct bio *bio = NULL;
496 struct page *page;
497 bool done = false;
498 errseq_t since;
499
500 memset(head, 0, sizeof(*head));
501 if (list_empty(&jd->extent_list))
502 gfs2_map_journal_extents(sdp, jd);
503
504 since = filemap_sample_wb_err(mapping);
505 list_for_each_entry(je, &jd->extent_list, list) {
506 for (block = 0; block < je->blocks; block += blocks_per_page) {
507 index = (je->lblock + block) >> shift;
508
509 page = find_or_create_page(mapping, index, GFP_NOFS);
510 if (!page) {
511 ret = -ENOMEM;
512 done = true;
513 goto out;
514 }
515
516 if (bio) {
517 sz = bio_add_page(bio, page, PAGE_SIZE, 0);
518 if (sz == PAGE_SIZE)
519 goto page_added;
520 submit_idx = index;
521 submit_bio(bio);
522 bio = NULL;
523 }
524
525 bio = gfs2_log_alloc_bio(sdp,
526 je->dblock + (index << shift),
527 gfs2_end_log_read);
528 bio->bi_opf = REQ_OP_READ;
529 sz = bio_add_page(bio, page, PAGE_SIZE, 0);
530 gfs2_assert_warn(sdp, sz == PAGE_SIZE);
531
532page_added:
533 if (submit_idx <= read_idx + BIO_MAX_PAGES) {
534 /* Keep at least one bio in flight */
535 continue;
536 }
537
538 gfs2_jhead_process_page(jd, read_idx++, head, &done);
539 if (done)
540 goto out; /* found */
541 }
542 }
543
544out:
545 if (bio)
546 submit_bio(bio);
547 while (read_idx <= index)
548 gfs2_jhead_process_page(jd, read_idx++, head, &done);
549
550 if (!ret)
551 ret = filemap_check_wb_err(mapping, since);
552
553 return ret;
554}
555
556static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type, 378static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
557 u32 ld_length, u32 ld_data1) 379 u32 ld_length, u32 ld_data1)
558{ 380{
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 331160fc568b..711c4d89c063 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -30,10 +30,8 @@ extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp);
30extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page, 30extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
31 unsigned size, unsigned offset, u64 blkno); 31 unsigned size, unsigned offset, u64 blkno);
32extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); 32extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
33extern void gfs2_log_submit_bio(struct bio **biop, int opf); 33extern void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags);
34extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); 34extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
35extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
36 struct gfs2_log_header_host *head);
37 35
38static inline unsigned int buf_limit(struct gfs2_sbd *sdp) 36static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
39{ 37{
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1179763f6370..b041cb8ae383 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -41,7 +41,6 @@
41#include "dir.h" 41#include "dir.h"
42#include "meta_io.h" 42#include "meta_io.h"
43#include "trace_gfs2.h" 43#include "trace_gfs2.h"
44#include "lops.h"
45 44
46#define DO 0 45#define DO 0
47#define UNDO 1 46#define UNDO 1
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 7389e445a7a7..2dac43065382 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -182,6 +182,129 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
182} 182}
183 183
184/** 184/**
185 * find_good_lh - find a good log header
186 * @jd: the journal
187 * @blk: the segment to start searching from
188 * @lh: the log header to fill in
189 * @forward: if true search forward in the log, else search backward
190 *
191 * Call get_log_header() to get a log header for a segment, but if the
192 * segment is bad, either scan forward or backward until we find a good one.
193 *
194 * Returns: errno
195 */
196
197static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
198 struct gfs2_log_header_host *head)
199{
200 unsigned int orig_blk = *blk;
201 int error;
202
203 for (;;) {
204 error = get_log_header(jd, *blk, head);
205 if (error <= 0)
206 return error;
207
208 if (++*blk == jd->jd_blocks)
209 *blk = 0;
210
211 if (*blk == orig_blk) {
212 gfs2_consist_inode(GFS2_I(jd->jd_inode));
213 return -EIO;
214 }
215 }
216}
217
218/**
219 * jhead_scan - make sure we've found the head of the log
220 * @jd: the journal
221 * @head: this is filled in with the log descriptor of the head
222 *
223 * At this point, seg and lh should be either the head of the log or just
224 * before. Scan forward until we find the head.
225 *
226 * Returns: errno
227 */
228
229static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
230{
231 unsigned int blk = head->lh_blkno;
232 struct gfs2_log_header_host lh;
233 int error;
234
235 for (;;) {
236 if (++blk == jd->jd_blocks)
237 blk = 0;
238
239 error = get_log_header(jd, blk, &lh);
240 if (error < 0)
241 return error;
242 if (error == 1)
243 continue;
244
245 if (lh.lh_sequence == head->lh_sequence) {
246 gfs2_consist_inode(GFS2_I(jd->jd_inode));
247 return -EIO;
248 }
249 if (lh.lh_sequence < head->lh_sequence)
250 break;
251
252 *head = lh;
253 }
254
255 return 0;
256}
257
258/**
259 * gfs2_find_jhead - find the head of a log
260 * @jd: the journal
261 * @head: the log descriptor for the head of the log is returned here
262 *
263 * Do a binary search of a journal and find the valid log entry with the
264 * highest sequence number. (i.e. the log head)
265 *
266 * Returns: errno
267 */
268
269int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
270{
271 struct gfs2_log_header_host lh_1, lh_m;
272 u32 blk_1, blk_2, blk_m;
273 int error;
274
275 blk_1 = 0;
276 blk_2 = jd->jd_blocks - 1;
277
278 for (;;) {
279 blk_m = (blk_1 + blk_2) / 2;
280
281 error = find_good_lh(jd, &blk_1, &lh_1);
282 if (error)
283 return error;
284
285 error = find_good_lh(jd, &blk_m, &lh_m);
286 if (error)
287 return error;
288
289 if (blk_1 == blk_m || blk_m == blk_2)
290 break;
291
292 if (lh_1.lh_sequence <= lh_m.lh_sequence)
293 blk_1 = blk_m;
294 else
295 blk_2 = blk_m;
296 }
297
298 error = jhead_scan(jd, &lh_1);
299 if (error)
300 return error;
301
302 *head = lh_1;
303
304 return error;
305}
306
307/**
185 * foreach_descriptor - go through the active part of the log 308 * foreach_descriptor - go through the active part of the log
186 * @jd: the journal 309 * @jd: the journal
187 * @start: the first log header in the active region 310 * @start: the first log header in the active region
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 99575ab81202..11d81248be85 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -27,6 +27,8 @@ extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
27extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); 27extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
28extern void gfs2_revoke_clean(struct gfs2_jdesc *jd); 28extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
29 29
30extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
31 struct gfs2_log_header_host *head);
30extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait); 32extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
31extern void gfs2_recover_func(struct work_struct *work); 33extern void gfs2_recover_func(struct work_struct *work);
32extern int __get_log_header(struct gfs2_sbd *sdp, 34extern int __get_log_header(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 831d7cb5a49c..17a8d3b43990 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1780,9 +1780,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
1780 goto next_iter; 1780 goto next_iter;
1781 } 1781 }
1782 if (ret == -E2BIG) { 1782 if (ret == -E2BIG) {
1783 n += rbm->bii - initial_bii;
1784 rbm->bii = 0; 1783 rbm->bii = 0;
1785 rbm->offset = 0; 1784 rbm->offset = 0;
1785 n += (rbm->bii - initial_bii);
1786 goto res_covered_end_of_rgrp; 1786 goto res_covered_end_of_rgrp;
1787 } 1787 }
1788 return ret; 1788 return ret;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index d4b11c903971..ca71163ff7cf 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -45,7 +45,6 @@
45#include "util.h" 45#include "util.h"
46#include "sys.h" 46#include "sys.h"
47#include "xattr.h" 47#include "xattr.h"
48#include "lops.h"
49 48
50#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) 49#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
51 50
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a2fcea5f8225..32920a10100e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -383,16 +383,17 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
383 * truncation is indicated by end of range being LLONG_MAX 383 * truncation is indicated by end of range being LLONG_MAX
384 * In this case, we first scan the range and release found pages. 384 * In this case, we first scan the range and release found pages.
385 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv 385 * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
386 * maps and global counts. 386 * maps and global counts. Page faults can not race with truncation
387 * in this routine. hugetlb_no_page() prevents page faults in the
388 * truncated range. It checks i_size before allocation, and again after
389 * with the page table lock for the page held. The same lock must be
390 * acquired to unmap a page.
387 * hole punch is indicated if end is not LLONG_MAX 391 * hole punch is indicated if end is not LLONG_MAX
388 * In the hole punch case we scan the range and release found pages. 392 * In the hole punch case we scan the range and release found pages.
389 * Only when releasing a page is the associated region/reserv map 393 * Only when releasing a page is the associated region/reserv map
390 * deleted. The region/reserv map for ranges without associated 394 * deleted. The region/reserv map for ranges without associated
391 * pages are not modified. 395 * pages are not modified. Page faults can race with hole punch.
392 * 396 * This is indicated if we find a mapped page.
393 * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent
394 * races with page faults.
395 *
396 * Note: If the passed end of range value is beyond the end of file, but 397 * Note: If the passed end of range value is beyond the end of file, but
397 * not LLONG_MAX this routine still performs a hole punch operation. 398 * not LLONG_MAX this routine still performs a hole punch operation.
398 */ 399 */
@@ -422,14 +423,32 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
422 423
423 for (i = 0; i < pagevec_count(&pvec); ++i) { 424 for (i = 0; i < pagevec_count(&pvec); ++i) {
424 struct page *page = pvec.pages[i]; 425 struct page *page = pvec.pages[i];
426 u32 hash;
425 427
426 index = page->index; 428 index = page->index;
429 hash = hugetlb_fault_mutex_hash(h, current->mm,
430 &pseudo_vma,
431 mapping, index, 0);
432 mutex_lock(&hugetlb_fault_mutex_table[hash]);
433
427 /* 434 /*
428 * A mapped page is impossible as callers should unmap 435 * If page is mapped, it was faulted in after being
429 * all references before calling. And, i_mmap_rwsem 436 * unmapped in caller. Unmap (again) now after taking
430 * prevents the creation of additional mappings. 437 * the fault mutex. The mutex will prevent faults
438 * until we finish removing the page.
439 *
440 * This race can only happen in the hole punch case.
441 * Getting here in a truncate operation is a bug.
431 */ 442 */
432 VM_BUG_ON(page_mapped(page)); 443 if (unlikely(page_mapped(page))) {
444 BUG_ON(truncate_op);
445
446 i_mmap_lock_write(mapping);
447 hugetlb_vmdelete_list(&mapping->i_mmap,
448 index * pages_per_huge_page(h),
449 (index + 1) * pages_per_huge_page(h));
450 i_mmap_unlock_write(mapping);
451 }
433 452
434 lock_page(page); 453 lock_page(page);
435 /* 454 /*
@@ -451,6 +470,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
451 } 470 }
452 471
453 unlock_page(page); 472 unlock_page(page);
473 mutex_unlock(&hugetlb_fault_mutex_table[hash]);
454 } 474 }
455 huge_pagevec_release(&pvec); 475 huge_pagevec_release(&pvec);
456 cond_resched(); 476 cond_resched();
@@ -462,20 +482,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
462 482
463static void hugetlbfs_evict_inode(struct inode *inode) 483static void hugetlbfs_evict_inode(struct inode *inode)
464{ 484{
465 struct address_space *mapping = inode->i_mapping;
466 struct resv_map *resv_map; 485 struct resv_map *resv_map;
467 486
468 /*
469 * The vfs layer guarantees that there are no other users of this
470 * inode. Therefore, it would be safe to call remove_inode_hugepages
471 * without holding i_mmap_rwsem. We acquire and hold here to be
472 * consistent with other callers. Since there will be no contention
473 * on the semaphore, overhead is negligible.
474 */
475 i_mmap_lock_write(mapping);
476 remove_inode_hugepages(inode, 0, LLONG_MAX); 487 remove_inode_hugepages(inode, 0, LLONG_MAX);
477 i_mmap_unlock_write(mapping);
478
479 resv_map = (struct resv_map *)inode->i_mapping->private_data; 488 resv_map = (struct resv_map *)inode->i_mapping->private_data;
480 /* root inode doesn't have the resv_map, so we should check it */ 489 /* root inode doesn't have the resv_map, so we should check it */
481 if (resv_map) 490 if (resv_map)
@@ -496,8 +505,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
496 i_mmap_lock_write(mapping); 505 i_mmap_lock_write(mapping);
497 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) 506 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
498 hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); 507 hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
499 remove_inode_hugepages(inode, offset, LLONG_MAX);
500 i_mmap_unlock_write(mapping); 508 i_mmap_unlock_write(mapping);
509 remove_inode_hugepages(inode, offset, LLONG_MAX);
501 return 0; 510 return 0;
502} 511}
503 512
@@ -531,8 +540,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
531 hugetlb_vmdelete_list(&mapping->i_mmap, 540 hugetlb_vmdelete_list(&mapping->i_mmap,
532 hole_start >> PAGE_SHIFT, 541 hole_start >> PAGE_SHIFT,
533 hole_end >> PAGE_SHIFT); 542 hole_end >> PAGE_SHIFT);
534 remove_inode_hugepages(inode, hole_start, hole_end);
535 i_mmap_unlock_write(mapping); 543 i_mmap_unlock_write(mapping);
544 remove_inode_hugepages(inode, hole_start, hole_end);
536 inode_unlock(inode); 545 inode_unlock(inode);
537 } 546 }
538 547
@@ -615,11 +624,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
615 /* addr is the offset within the file (zero based) */ 624 /* addr is the offset within the file (zero based) */
616 addr = index * hpage_size; 625 addr = index * hpage_size;
617 626
618 /* 627 /* mutex taken here, fault path and hole punch */
619 * fault mutex taken here, protects against fault path
620 * and hole punch. inode_lock previously taken protects
621 * against truncation.
622 */
623 hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping, 628 hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
624 index, addr); 629 index, addr);
625 mutex_lock(&hugetlb_fault_mutex_table[hash]); 630 mutex_lock(&hugetlb_fault_mutex_table[hash]);
diff --git a/fs/inode.c b/fs/inode.c
index 0cd47fe0dbe5..73432e64f874 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -730,11 +730,8 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
730 return LRU_REMOVED; 730 return LRU_REMOVED;
731 } 731 }
732 732
733 /* 733 /* recently referenced inodes get one more pass */
734 * Recently referenced inodes and inodes with many attached pages 734 if (inode->i_state & I_REFERENCED) {
735 * get one more pass.
736 */
737 if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) {
738 inode->i_state &= ~I_REFERENCED; 735 inode->i_state &= ~I_REFERENCED;
739 spin_unlock(&inode->i_lock); 736 spin_unlock(&inode->i_lock);
740 return LRU_ROTATE; 737 return LRU_ROTATE;
diff --git a/fs/iomap.c b/fs/iomap.c
index a3088fae567b..897c60215dd1 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -116,6 +116,12 @@ iomap_page_create(struct inode *inode, struct page *page)
116 atomic_set(&iop->read_count, 0); 116 atomic_set(&iop->read_count, 0);
117 atomic_set(&iop->write_count, 0); 117 atomic_set(&iop->write_count, 0);
118 bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE); 118 bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
119
120 /*
121 * migrate_page_move_mapping() assumes that pages with private data have
122 * their count elevated by 1.
123 */
124 get_page(page);
119 set_page_private(page, (unsigned long)iop); 125 set_page_private(page, (unsigned long)iop);
120 SetPagePrivate(page); 126 SetPagePrivate(page);
121 return iop; 127 return iop;
@@ -132,6 +138,7 @@ iomap_page_release(struct page *page)
132 WARN_ON_ONCE(atomic_read(&iop->write_count)); 138 WARN_ON_ONCE(atomic_read(&iop->write_count));
133 ClearPagePrivate(page); 139 ClearPagePrivate(page);
134 set_page_private(page, 0); 140 set_page_private(page, 0);
141 put_page(page);
135 kfree(iop); 142 kfree(iop);
136} 143}
137 144
@@ -569,8 +576,10 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage,
569 576
570 if (page_has_private(page)) { 577 if (page_has_private(page)) {
571 ClearPagePrivate(page); 578 ClearPagePrivate(page);
579 get_page(newpage);
572 set_page_private(newpage, page_private(page)); 580 set_page_private(newpage, page_private(page));
573 set_page_private(page, 0); 581 set_page_private(page, 0);
582 put_page(page);
574 SetPagePrivate(newpage); 583 SetPagePrivate(newpage);
575 } 584 }
576 585
@@ -1804,6 +1813,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1804 loff_t pos = iocb->ki_pos, start = pos; 1813 loff_t pos = iocb->ki_pos, start = pos;
1805 loff_t end = iocb->ki_pos + count - 1, ret = 0; 1814 loff_t end = iocb->ki_pos + count - 1, ret = 0;
1806 unsigned int flags = IOMAP_DIRECT; 1815 unsigned int flags = IOMAP_DIRECT;
1816 bool wait_for_completion = is_sync_kiocb(iocb);
1807 struct blk_plug plug; 1817 struct blk_plug plug;
1808 struct iomap_dio *dio; 1818 struct iomap_dio *dio;
1809 1819
@@ -1823,7 +1833,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1823 dio->end_io = end_io; 1833 dio->end_io = end_io;
1824 dio->error = 0; 1834 dio->error = 0;
1825 dio->flags = 0; 1835 dio->flags = 0;
1826 dio->wait_for_completion = is_sync_kiocb(iocb);
1827 1836
1828 dio->submit.iter = iter; 1837 dio->submit.iter = iter;
1829 dio->submit.waiter = current; 1838 dio->submit.waiter = current;
@@ -1878,7 +1887,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1878 dio_warn_stale_pagecache(iocb->ki_filp); 1887 dio_warn_stale_pagecache(iocb->ki_filp);
1879 ret = 0; 1888 ret = 0;
1880 1889
1881 if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion && 1890 if (iov_iter_rw(iter) == WRITE && !wait_for_completion &&
1882 !inode->i_sb->s_dio_done_wq) { 1891 !inode->i_sb->s_dio_done_wq) {
1883 ret = sb_init_dio_done_wq(inode->i_sb); 1892 ret = sb_init_dio_done_wq(inode->i_sb);
1884 if (ret < 0) 1893 if (ret < 0)
@@ -1894,7 +1903,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1894 if (ret <= 0) { 1903 if (ret <= 0) {
1895 /* magic error code to fall back to buffered I/O */ 1904 /* magic error code to fall back to buffered I/O */
1896 if (ret == -ENOTBLK) { 1905 if (ret == -ENOTBLK) {
1897 dio->wait_for_completion = true; 1906 wait_for_completion = true;
1898 ret = 0; 1907 ret = 0;
1899 } 1908 }
1900 break; 1909 break;
@@ -1916,8 +1925,24 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1916 if (dio->flags & IOMAP_DIO_WRITE_FUA) 1925 if (dio->flags & IOMAP_DIO_WRITE_FUA)
1917 dio->flags &= ~IOMAP_DIO_NEED_SYNC; 1926 dio->flags &= ~IOMAP_DIO_NEED_SYNC;
1918 1927
1928 /*
1929 * We are about to drop our additional submission reference, which
1930 * might be the last reference to the dio. There are three three
1931 * different ways we can progress here:
1932 *
1933 * (a) If this is the last reference we will always complete and free
1934 * the dio ourselves.
1935 * (b) If this is not the last reference, and we serve an asynchronous
1936 * iocb, we must never touch the dio after the decrement, the
1937 * I/O completion handler will complete and free it.
1938 * (c) If this is not the last reference, but we serve a synchronous
1939 * iocb, the I/O completion handler will wake us up on the drop
1940 * of the final reference, and we will complete and free it here
1941 * after we got woken by the I/O completion handler.
1942 */
1943 dio->wait_for_completion = wait_for_completion;
1919 if (!atomic_dec_and_test(&dio->ref)) { 1944 if (!atomic_dec_and_test(&dio->ref)) {
1920 if (!dio->wait_for_completion) 1945 if (!wait_for_completion)
1921 return -EIOCBQUEUED; 1946 return -EIOCBQUEUED;
1922 1947
1923 for (;;) { 1948 for (;;) {
@@ -1934,9 +1959,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1934 __set_current_state(TASK_RUNNING); 1959 __set_current_state(TASK_RUNNING);
1935 } 1960 }
1936 1961
1937 ret = iomap_dio_complete(dio); 1962 return iomap_dio_complete(dio);
1938
1939 return ret;
1940 1963
1941out_free_dio: 1964out_free_dio:
1942 kfree(dio); 1965 kfree(dio);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 46d691ba04bc..45b2322e092d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,15 +133,9 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
133 struct file *file_out, loff_t pos_out, 133 struct file *file_out, loff_t pos_out,
134 size_t count, unsigned int flags) 134 size_t count, unsigned int flags)
135{ 135{
136 ssize_t ret;
137
138 if (file_inode(file_in) == file_inode(file_out)) 136 if (file_inode(file_in) == file_inode(file_out))
139 return -EINVAL; 137 return -EINVAL;
140retry: 138 return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
141 ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
142 if (ret == -EAGAIN)
143 goto retry;
144 return ret;
145} 139}
146 140
147static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) 141static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 3f23b6840547..bf34ddaa2ad7 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -44,6 +44,7 @@
44#include <linux/keyctl.h> 44#include <linux/keyctl.h>
45#include <linux/key-type.h> 45#include <linux/key-type.h>
46#include <keys/user-type.h> 46#include <keys/user-type.h>
47#include <keys/request_key_auth-type.h>
47#include <linux/module.h> 48#include <linux/module.h>
48 49
49#include "internal.h" 50#include "internal.h"
@@ -59,7 +60,7 @@ static struct key_type key_type_id_resolver_legacy;
59struct idmap_legacy_upcalldata { 60struct idmap_legacy_upcalldata {
60 struct rpc_pipe_msg pipe_msg; 61 struct rpc_pipe_msg pipe_msg;
61 struct idmap_msg idmap_msg; 62 struct idmap_msg idmap_msg;
62 struct key_construction *key_cons; 63 struct key *authkey;
63 struct idmap *idmap; 64 struct idmap *idmap;
64}; 65};
65 66
@@ -384,7 +385,7 @@ static const match_table_t nfs_idmap_tokens = {
384 { Opt_find_err, NULL } 385 { Opt_find_err, NULL }
385}; 386};
386 387
387static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); 388static int nfs_idmap_legacy_upcall(struct key *, void *);
388static ssize_t idmap_pipe_downcall(struct file *, const char __user *, 389static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
389 size_t); 390 size_t);
390static void idmap_release_pipe(struct inode *); 391static void idmap_release_pipe(struct inode *);
@@ -549,11 +550,12 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
549static void 550static void
550nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) 551nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
551{ 552{
552 struct key_construction *cons = idmap->idmap_upcall_data->key_cons; 553 struct key *authkey = idmap->idmap_upcall_data->authkey;
553 554
554 kfree(idmap->idmap_upcall_data); 555 kfree(idmap->idmap_upcall_data);
555 idmap->idmap_upcall_data = NULL; 556 idmap->idmap_upcall_data = NULL;
556 complete_request_key(cons, ret); 557 complete_request_key(authkey, ret);
558 key_put(authkey);
557} 559}
558 560
559static void 561static void
@@ -563,15 +565,14 @@ nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
563 nfs_idmap_complete_pipe_upcall_locked(idmap, ret); 565 nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
564} 566}
565 567
566static int nfs_idmap_legacy_upcall(struct key_construction *cons, 568static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
567 const char *op,
568 void *aux)
569{ 569{
570 struct idmap_legacy_upcalldata *data; 570 struct idmap_legacy_upcalldata *data;
571 struct request_key_auth *rka = get_request_key_auth(authkey);
571 struct rpc_pipe_msg *msg; 572 struct rpc_pipe_msg *msg;
572 struct idmap_msg *im; 573 struct idmap_msg *im;
573 struct idmap *idmap = (struct idmap *)aux; 574 struct idmap *idmap = (struct idmap *)aux;
574 struct key *key = cons->key; 575 struct key *key = rka->target_key;
575 int ret = -ENOKEY; 576 int ret = -ENOKEY;
576 577
577 if (!aux) 578 if (!aux)
@@ -586,7 +587,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
586 msg = &data->pipe_msg; 587 msg = &data->pipe_msg;
587 im = &data->idmap_msg; 588 im = &data->idmap_msg;
588 data->idmap = idmap; 589 data->idmap = idmap;
589 data->key_cons = cons; 590 data->authkey = key_get(authkey);
590 591
591 ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); 592 ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
592 if (ret < 0) 593 if (ret < 0)
@@ -604,7 +605,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
604out2: 605out2:
605 kfree(data); 606 kfree(data);
606out1: 607out1:
607 complete_request_key(cons, ret); 608 complete_request_key(authkey, ret);
608 return ret; 609 return ret;
609} 610}
610 611
@@ -651,9 +652,10 @@ out:
651static ssize_t 652static ssize_t
652idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) 653idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
653{ 654{
655 struct request_key_auth *rka;
654 struct rpc_inode *rpci = RPC_I(file_inode(filp)); 656 struct rpc_inode *rpci = RPC_I(file_inode(filp));
655 struct idmap *idmap = (struct idmap *)rpci->private; 657 struct idmap *idmap = (struct idmap *)rpci->private;
656 struct key_construction *cons; 658 struct key *authkey;
657 struct idmap_msg im; 659 struct idmap_msg im;
658 size_t namelen_in; 660 size_t namelen_in;
659 int ret = -ENOKEY; 661 int ret = -ENOKEY;
@@ -665,7 +667,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
665 if (idmap->idmap_upcall_data == NULL) 667 if (idmap->idmap_upcall_data == NULL)
666 goto out_noupcall; 668 goto out_noupcall;
667 669
668 cons = idmap->idmap_upcall_data->key_cons; 670 authkey = idmap->idmap_upcall_data->authkey;
671 rka = get_request_key_auth(authkey);
669 672
670 if (mlen != sizeof(im)) { 673 if (mlen != sizeof(im)) {
671 ret = -ENOSPC; 674 ret = -ENOSPC;
@@ -690,9 +693,9 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
690 693
691 ret = nfs_idmap_read_and_verify_message(&im, 694 ret = nfs_idmap_read_and_verify_message(&im,
692 &idmap->idmap_upcall_data->idmap_msg, 695 &idmap->idmap_upcall_data->idmap_msg,
693 cons->key, cons->authkey); 696 rka->target_key, authkey);
694 if (ret >= 0) { 697 if (ret >= 0) {
695 key_set_timeout(cons->key, nfs_idmap_cache_timeout); 698 key_set_timeout(rka->target_key, nfs_idmap_cache_timeout);
696 ret = mlen; 699 ret = mlen;
697 } 700 }
698 701
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 22ce3c8a2f46..0570391eaa16 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1895,6 +1895,11 @@ static int nfs_parse_devname(const char *dev_name,
1895 size_t len; 1895 size_t len;
1896 char *end; 1896 char *end;
1897 1897
1898 if (unlikely(!dev_name || !*dev_name)) {
1899 dfprintk(MOUNT, "NFS: device name not specified\n");
1900 return -EINVAL;
1901 }
1902
1898 /* Is the host name protected with square brakcets? */ 1903 /* Is the host name protected with square brakcets? */
1899 if (*dev_name == '[') { 1904 if (*dev_name == '[') {
1900 end = strchr(++dev_name, ']'); 1905 end = strchr(++dev_name, ']');
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5a0bbf917a32..d09c9f878141 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -238,9 +238,9 @@ out:
238} 238}
239 239
240/* A writeback failed: mark the page as bad, and invalidate the page cache */ 240/* A writeback failed: mark the page as bad, and invalidate the page cache */
241static void nfs_set_pageerror(struct page *page) 241static void nfs_set_pageerror(struct address_space *mapping)
242{ 242{
243 nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); 243 nfs_zap_mapping(mapping->host, mapping);
244} 244}
245 245
246/* 246/*
@@ -621,11 +621,12 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
621 nfs_set_page_writeback(page); 621 nfs_set_page_writeback(page);
622 WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); 622 WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
623 623
624 ret = 0; 624 ret = req->wb_context->error;
625 /* If there is a fatal error that covers this write, just exit */ 625 /* If there is a fatal error that covers this write, just exit */
626 if (nfs_error_is_fatal_on_server(req->wb_context->error)) 626 if (nfs_error_is_fatal_on_server(ret))
627 goto out_launder; 627 goto out_launder;
628 628
629 ret = 0;
629 if (!nfs_pageio_add_request(pgio, req)) { 630 if (!nfs_pageio_add_request(pgio, req)) {
630 ret = pgio->pg_error; 631 ret = pgio->pg_error;
631 /* 632 /*
@@ -635,9 +636,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
635 nfs_context_set_write_error(req->wb_context, ret); 636 nfs_context_set_write_error(req->wb_context, ret);
636 if (nfs_error_is_fatal_on_server(ret)) 637 if (nfs_error_is_fatal_on_server(ret))
637 goto out_launder; 638 goto out_launder;
638 } 639 } else
640 ret = -EAGAIN;
639 nfs_redirty_request(req); 641 nfs_redirty_request(req);
640 ret = -EAGAIN;
641 } else 642 } else
642 nfs_add_stats(page_file_mapping(page)->host, 643 nfs_add_stats(page_file_mapping(page)->host,
643 NFSIOS_WRITEPAGES, 1); 644 NFSIOS_WRITEPAGES, 1);
@@ -993,7 +994,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
993 nfs_list_remove_request(req); 994 nfs_list_remove_request(req);
994 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && 995 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
995 (hdr->good_bytes < bytes)) { 996 (hdr->good_bytes < bytes)) {
996 nfs_set_pageerror(req->wb_page); 997 nfs_set_pageerror(page_file_mapping(req->wb_page));
997 nfs_context_set_write_error(req->wb_context, hdr->error); 998 nfs_context_set_write_error(req->wb_context, hdr->error);
998 goto remove_req; 999 goto remove_req;
999 } 1000 }
@@ -1347,7 +1348,8 @@ int nfs_updatepage(struct file *file, struct page *page,
1347 unsigned int offset, unsigned int count) 1348 unsigned int offset, unsigned int count)
1348{ 1349{
1349 struct nfs_open_context *ctx = nfs_file_open_context(file); 1350 struct nfs_open_context *ctx = nfs_file_open_context(file);
1350 struct inode *inode = page_file_mapping(page)->host; 1351 struct address_space *mapping = page_file_mapping(page);
1352 struct inode *inode = mapping->host;
1351 int status = 0; 1353 int status = 0;
1352 1354
1353 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); 1355 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -1365,7 +1367,7 @@ int nfs_updatepage(struct file *file, struct page *page,
1365 1367
1366 status = nfs_writepage_setup(ctx, page, offset, count); 1368 status = nfs_writepage_setup(ctx, page, offset, count);
1367 if (status < 0) 1369 if (status < 0)
1368 nfs_set_pageerror(page); 1370 nfs_set_pageerror(mapping);
1369 else 1371 else
1370 __set_page_dirty_nobuffers(page); 1372 __set_page_dirty_nobuffers(page);
1371out: 1373out:
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b33f9785b756..72a7681f4046 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1239,8 +1239,8 @@ static __net_init int nfsd_init_net(struct net *net)
1239 retval = nfsd_idmap_init(net); 1239 retval = nfsd_idmap_init(net);
1240 if (retval) 1240 if (retval)
1241 goto out_idmap_error; 1241 goto out_idmap_error;
1242 nn->nfsd4_lease = 45; /* default lease time */ 1242 nn->nfsd4_lease = 90; /* default lease time */
1243 nn->nfsd4_grace = 45; 1243 nn->nfsd4_grace = 90;
1244 nn->somebody_reclaimed = false; 1244 nn->somebody_reclaimed = false;
1245 nn->clverifier_counter = prandom_u32(); 1245 nn->clverifier_counter = prandom_u32();
1246 nn->clientid_counter = prandom_u32(); 1246 nn->clientid_counter = prandom_u32();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9824e32b2f23..7dc98e14655d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -557,9 +557,11 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
557 loff_t cloned; 557 loff_t cloned;
558 558
559 cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); 559 cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
560 if (cloned < 0)
561 return nfserrno(cloned);
560 if (count && cloned != count) 562 if (count && cloned != count)
561 cloned = -EINVAL; 563 return nfserrno(-EINVAL);
562 return nfserrno(cloned < 0 ? cloned : 0); 564 return 0;
563} 565}
564 566
565ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, 567ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 105576daca4a..798f1253141a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -724,8 +724,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
724 return -EBADF; 724 return -EBADF;
725 725
726 /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ 726 /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */
727 if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) 727 if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) {
728 return -EINVAL; 728 ret = -EINVAL;
729 goto fput_and_out;
730 }
729 731
730 /* verify that this is indeed an inotify instance */ 732 /* verify that this is indeed an inotify instance */
731 if (unlikely(f.file->f_op != &inotify_fops)) { 733 if (unlikely(f.file->f_op != &inotify_fops)) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 633a63462573..f5ed9512d193 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1086,10 +1086,6 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
1086 1086
1087 task_lock(p); 1087 task_lock(p);
1088 if (!p->vfork_done && process_shares_mm(p, mm)) { 1088 if (!p->vfork_done && process_shares_mm(p, mm)) {
1089 pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
1090 task_pid_nr(p), p->comm,
1091 p->signal->oom_score_adj, oom_adj,
1092 task_pid_nr(task), task->comm);
1093 p->signal->oom_score_adj = oom_adj; 1089 p->signal->oom_score_adj = oom_adj;
1094 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1090 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
1095 p->signal->oom_score_adj_min = (short)oom_adj; 1091 p->signal->oom_score_adj_min = (short)oom_adj;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 8ae109429a88..e39bac94dead 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
256 inode = proc_get_inode(dir->i_sb, de); 256 inode = proc_get_inode(dir->i_sb, de);
257 if (!inode) 257 if (!inode)
258 return ERR_PTR(-ENOMEM); 258 return ERR_PTR(-ENOMEM);
259 d_set_d_op(dentry, &proc_misc_dentry_ops); 259 d_set_d_op(dentry, de->proc_dops);
260 return d_splice_alias(inode, dentry); 260 return d_splice_alias(inode, dentry);
261 } 261 }
262 read_unlock(&proc_subdir_lock); 262 read_unlock(&proc_subdir_lock);
@@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
429 INIT_LIST_HEAD(&ent->pde_openers); 429 INIT_LIST_HEAD(&ent->pde_openers);
430 proc_set_user(ent, (*parent)->uid, (*parent)->gid); 430 proc_set_user(ent, (*parent)->uid, (*parent)->gid);
431 431
432 ent->proc_dops = &proc_misc_dentry_ops;
433
432out: 434out:
433 return ent; 435 return ent;
434} 436}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5185d7f6a51e..95b14196f284 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,6 +44,7 @@ struct proc_dir_entry {
44 struct completion *pde_unload_completion; 44 struct completion *pde_unload_completion;
45 const struct inode_operations *proc_iops; 45 const struct inode_operations *proc_iops;
46 const struct file_operations *proc_fops; 46 const struct file_operations *proc_fops;
47 const struct dentry_operations *proc_dops;
47 union { 48 union {
48 const struct seq_operations *seq_ops; 49 const struct seq_operations *seq_ops;
49 int (*single_show)(struct seq_file *, void *); 50 int (*single_show)(struct seq_file *, void *);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index d5e0fcb3439e..a7b12435519e 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode)
38 return maybe_get_net(PDE_NET(PDE(inode))); 38 return maybe_get_net(PDE_NET(PDE(inode)));
39} 39}
40 40
41static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
42{
43 return 0;
44}
45
46static const struct dentry_operations proc_net_dentry_ops = {
47 .d_revalidate = proc_net_d_revalidate,
48 .d_delete = always_delete_dentry,
49};
50
51static void pde_force_lookup(struct proc_dir_entry *pde)
52{
53 /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
54 pde->proc_dops = &proc_net_dentry_ops;
55}
56
41static int seq_open_net(struct inode *inode, struct file *file) 57static int seq_open_net(struct inode *inode, struct file *file)
42{ 58{
43 unsigned int state_size = PDE(inode)->state_size; 59 unsigned int state_size = PDE(inode)->state_size;
@@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
90 p = proc_create_reg(name, mode, &parent, data); 106 p = proc_create_reg(name, mode, &parent, data);
91 if (!p) 107 if (!p)
92 return NULL; 108 return NULL;
109 pde_force_lookup(p);
93 p->proc_fops = &proc_net_seq_fops; 110 p->proc_fops = &proc_net_seq_fops;
94 p->seq_ops = ops; 111 p->seq_ops = ops;
95 p->state_size = state_size; 112 p->state_size = state_size;
@@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode
133 p = proc_create_reg(name, mode, &parent, data); 150 p = proc_create_reg(name, mode, &parent, data);
134 if (!p) 151 if (!p)
135 return NULL; 152 return NULL;
153 pde_force_lookup(p);
136 p->proc_fops = &proc_net_seq_fops; 154 p->proc_fops = &proc_net_seq_fops;
137 p->seq_ops = ops; 155 p->seq_ops = ops;
138 p->state_size = state_size; 156 p->state_size = state_size;
@@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
181 p = proc_create_reg(name, mode, &parent, data); 199 p = proc_create_reg(name, mode, &parent, data);
182 if (!p) 200 if (!p)
183 return NULL; 201 return NULL;
202 pde_force_lookup(p);
184 p->proc_fops = &proc_net_single_fops; 203 p->proc_fops = &proc_net_single_fops;
185 p->single_show = show; 204 p->single_show = show;
186 return proc_register(parent, p); 205 return proc_register(parent, p);
@@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
223 p = proc_create_reg(name, mode, &parent, data); 242 p = proc_create_reg(name, mode, &parent, data);
224 if (!p) 243 if (!p)
225 return NULL; 244 return NULL;
245 pde_force_lookup(p);
226 p->proc_fops = &proc_net_single_fops; 246 p->proc_fops = &proc_net_single_fops;
227 p->single_show = show; 247 p->single_show = show;
228 p->write = write; 248 p->write = write;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0ec9edab2f3..85b0ef890b28 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -423,7 +423,7 @@ struct mem_size_stats {
423}; 423};
424 424
425static void smaps_account(struct mem_size_stats *mss, struct page *page, 425static void smaps_account(struct mem_size_stats *mss, struct page *page,
426 bool compound, bool young, bool dirty) 426 bool compound, bool young, bool dirty, bool locked)
427{ 427{
428 int i, nr = compound ? 1 << compound_order(page) : 1; 428 int i, nr = compound ? 1 << compound_order(page) : 1;
429 unsigned long size = nr * PAGE_SIZE; 429 unsigned long size = nr * PAGE_SIZE;
@@ -450,24 +450,31 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
450 else 450 else
451 mss->private_clean += size; 451 mss->private_clean += size;
452 mss->pss += (u64)size << PSS_SHIFT; 452 mss->pss += (u64)size << PSS_SHIFT;
453 if (locked)
454 mss->pss_locked += (u64)size << PSS_SHIFT;
453 return; 455 return;
454 } 456 }
455 457
456 for (i = 0; i < nr; i++, page++) { 458 for (i = 0; i < nr; i++, page++) {
457 int mapcount = page_mapcount(page); 459 int mapcount = page_mapcount(page);
460 unsigned long pss = (PAGE_SIZE << PSS_SHIFT);
458 461
459 if (mapcount >= 2) { 462 if (mapcount >= 2) {
460 if (dirty || PageDirty(page)) 463 if (dirty || PageDirty(page))
461 mss->shared_dirty += PAGE_SIZE; 464 mss->shared_dirty += PAGE_SIZE;
462 else 465 else
463 mss->shared_clean += PAGE_SIZE; 466 mss->shared_clean += PAGE_SIZE;
464 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; 467 mss->pss += pss / mapcount;
468 if (locked)
469 mss->pss_locked += pss / mapcount;
465 } else { 470 } else {
466 if (dirty || PageDirty(page)) 471 if (dirty || PageDirty(page))
467 mss->private_dirty += PAGE_SIZE; 472 mss->private_dirty += PAGE_SIZE;
468 else 473 else
469 mss->private_clean += PAGE_SIZE; 474 mss->private_clean += PAGE_SIZE;
470 mss->pss += PAGE_SIZE << PSS_SHIFT; 475 mss->pss += pss;
476 if (locked)
477 mss->pss_locked += pss;
471 } 478 }
472 } 479 }
473} 480}
@@ -490,6 +497,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
490{ 497{
491 struct mem_size_stats *mss = walk->private; 498 struct mem_size_stats *mss = walk->private;
492 struct vm_area_struct *vma = walk->vma; 499 struct vm_area_struct *vma = walk->vma;
500 bool locked = !!(vma->vm_flags & VM_LOCKED);
493 struct page *page = NULL; 501 struct page *page = NULL;
494 502
495 if (pte_present(*pte)) { 503 if (pte_present(*pte)) {
@@ -532,7 +540,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
532 if (!page) 540 if (!page)
533 return; 541 return;
534 542
535 smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte)); 543 smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
536} 544}
537 545
538#ifdef CONFIG_TRANSPARENT_HUGEPAGE 546#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -541,6 +549,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
541{ 549{
542 struct mem_size_stats *mss = walk->private; 550 struct mem_size_stats *mss = walk->private;
543 struct vm_area_struct *vma = walk->vma; 551 struct vm_area_struct *vma = walk->vma;
552 bool locked = !!(vma->vm_flags & VM_LOCKED);
544 struct page *page; 553 struct page *page;
545 554
546 /* FOLL_DUMP will return -EFAULT on huge zero page */ 555 /* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -555,7 +564,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
555 /* pass */; 564 /* pass */;
556 else 565 else
557 VM_BUG_ON_PAGE(1, page); 566 VM_BUG_ON_PAGE(1, page);
558 smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd)); 567 smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
559} 568}
560#else 569#else
561static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, 570static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -737,11 +746,8 @@ static void smap_gather_stats(struct vm_area_struct *vma,
737 } 746 }
738 } 747 }
739#endif 748#endif
740
741 /* mmap_sem is held in m_start */ 749 /* mmap_sem is held in m_start */
742 walk_page_vma(vma, &smaps_walk); 750 walk_page_vma(vma, &smaps_walk);
743 if (vma->vm_flags & VM_LOCKED)
744 mss->pss_locked += mss->pss;
745} 751}
746 752
747#define SEQ_PUT_DEC(str, val) \ 753#define SEQ_PUT_DEC(str, val) \
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 96f7d32cd184..898c8321b343 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -128,7 +128,6 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id,
128 struct pstore_record *record) 128 struct pstore_record *record)
129{ 129{
130 struct persistent_ram_zone *prz; 130 struct persistent_ram_zone *prz;
131 bool update = (record->type == PSTORE_TYPE_DMESG);
132 131
133 /* Give up if we never existed or have hit the end. */ 132 /* Give up if we never existed or have hit the end. */
134 if (!przs) 133 if (!przs)
@@ -139,7 +138,7 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id,
139 return NULL; 138 return NULL;
140 139
141 /* Update old/shadowed buffer. */ 140 /* Update old/shadowed buffer. */
142 if (update) 141 if (prz->type == PSTORE_TYPE_DMESG)
143 persistent_ram_save_old(prz); 142 persistent_ram_save_old(prz);
144 143
145 if (!persistent_ram_old_size(prz)) 144 if (!persistent_ram_old_size(prz))
@@ -711,18 +710,15 @@ static int ramoops_probe(struct platform_device *pdev)
711{ 710{
712 struct device *dev = &pdev->dev; 711 struct device *dev = &pdev->dev;
713 struct ramoops_platform_data *pdata = dev->platform_data; 712 struct ramoops_platform_data *pdata = dev->platform_data;
713 struct ramoops_platform_data pdata_local;
714 struct ramoops_context *cxt = &oops_cxt; 714 struct ramoops_context *cxt = &oops_cxt;
715 size_t dump_mem_sz; 715 size_t dump_mem_sz;
716 phys_addr_t paddr; 716 phys_addr_t paddr;
717 int err = -EINVAL; 717 int err = -EINVAL;
718 718
719 if (dev_of_node(dev) && !pdata) { 719 if (dev_of_node(dev) && !pdata) {
720 pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); 720 pdata = &pdata_local;
721 if (!pdata) { 721 memset(pdata, 0, sizeof(*pdata));
722 pr_err("cannot allocate platform data buffer\n");
723 err = -ENOMEM;
724 goto fail_out;
725 }
726 722
727 err = ramoops_parse_dt(pdev, pdata); 723 err = ramoops_parse_dt(pdev, pdata);
728 if (err < 0) 724 if (err < 0)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index feeae8081c22..aa85f2874a9f 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,7 +43,8 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
43 kuid_t uid; 43 kuid_t uid;
44 kgid_t gid; 44 kgid_t gid;
45 45
46 BUG_ON(!kobj); 46 if (WARN_ON(!kobj))
47 return -EINVAL;
47 48
48 if (kobj->parent) 49 if (kobj->parent)
49 parent = kobj->parent->sd; 50 parent = kobj->parent->sd;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index bb71db63c99c..51398457fe00 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -325,7 +325,8 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
325 kuid_t uid; 325 kuid_t uid;
326 kgid_t gid; 326 kgid_t gid;
327 327
328 BUG_ON(!kobj || !kobj->sd || !attr); 328 if (WARN_ON(!kobj || !kobj->sd || !attr))
329 return -EINVAL;
329 330
330 kobject_get_ownership(kobj, &uid, &gid); 331 kobject_get_ownership(kobj, &uid, &gid);
331 return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, 332 return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
@@ -537,7 +538,8 @@ int sysfs_create_bin_file(struct kobject *kobj,
537 kuid_t uid; 538 kuid_t uid;
538 kgid_t gid; 539 kgid_t gid;
539 540
540 BUG_ON(!kobj || !kobj->sd || !attr); 541 if (WARN_ON(!kobj || !kobj->sd || !attr))
542 return -EINVAL;
541 543
542 kobject_get_ownership(kobj, &uid, &gid); 544 kobject_get_ownership(kobj, &uid, &gid);
543 return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true, 545 return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1eb2d6307663..57038604d4a8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -112,7 +112,8 @@ static int internal_create_group(struct kobject *kobj, int update,
112 kgid_t gid; 112 kgid_t gid;
113 int error; 113 int error;
114 114
115 BUG_ON(!kobj || (!update && !kobj->sd)); 115 if (WARN_ON(!kobj || (!update && !kobj->sd)))
116 return -EINVAL;
116 117
117 /* Updates may happen before the object has been instantiated */ 118 /* Updates may happen before the object has been instantiated */
118 if (unlikely(update && !kobj->sd)) 119 if (unlikely(update && !kobj->sd))
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 215c225b2ca1..c4deecc80f67 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -23,7 +23,8 @@ static int sysfs_do_create_link_sd(struct kernfs_node *parent,
23{ 23{
24 struct kernfs_node *kn, *target = NULL; 24 struct kernfs_node *kn, *target = NULL;
25 25
26 BUG_ON(!name || !parent); 26 if (WARN_ON(!name || !parent))
27 return -EINVAL;
27 28
28 /* 29 /*
29 * We don't own @target_kobj and it may be removed at any time. 30 * We don't own @target_kobj and it may be removed at any time.
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 1c8eecfe52b8..6acf1bfa0bfe 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -768,18 +768,23 @@ xrep_findroot_block(
768 if (!uuid_equal(&btblock->bb_u.s.bb_uuid, 768 if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
769 &mp->m_sb.sb_meta_uuid)) 769 &mp->m_sb.sb_meta_uuid))
770 goto out; 770 goto out;
771 /*
772 * Read verifiers can reference b_ops, so we set the pointer
773 * here. If the verifier fails we'll reset the buffer state
774 * to what it was before we touched the buffer.
775 */
776 bp->b_ops = fab->buf_ops;
771 fab->buf_ops->verify_read(bp); 777 fab->buf_ops->verify_read(bp);
772 if (bp->b_error) { 778 if (bp->b_error) {
779 bp->b_ops = NULL;
773 bp->b_error = 0; 780 bp->b_error = 0;
774 goto out; 781 goto out;
775 } 782 }
776 783
777 /* 784 /*
778 * Some read verifiers will (re)set b_ops, so we must be 785 * Some read verifiers will (re)set b_ops, so we must be
779 * careful not to blow away any such assignment. 786 * careful not to change b_ops after running the verifier.
780 */ 787 */
781 if (!bp->b_ops)
782 bp->b_ops = fab->buf_ops;
783 } 788 }
784 789
785 /* 790 /*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 338b9d9984e0..d9048bcea49c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -449,6 +449,7 @@ xfs_map_blocks(
449 } 449 }
450 450
451 wpc->imap = imap; 451 wpc->imap = imap;
452 xfs_trim_extent_eof(&wpc->imap, ip);
452 trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap); 453 trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
453 return 0; 454 return 0;
454allocate_blocks: 455allocate_blocks:
@@ -459,6 +460,7 @@ allocate_blocks:
459 ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF || 460 ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
460 imap.br_startoff + imap.br_blockcount <= cow_fsb); 461 imap.br_startoff + imap.br_blockcount <= cow_fsb);
461 wpc->imap = imap; 462 wpc->imap = imap;
463 xfs_trim_extent_eof(&wpc->imap, ip);
462 trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap); 464 trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
463 return 0; 465 return 0;
464} 466}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index eedc5e0156ff..4f5f2ff3f70f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -776,10 +776,26 @@ _xfs_buf_read(
776} 776}
777 777
778/* 778/*
779 * Set buffer ops on an unchecked buffer and validate it, if possible.
780 *
779 * If the caller passed in an ops structure and the buffer doesn't have ops 781 * If the caller passed in an ops structure and the buffer doesn't have ops
780 * assigned, set the ops and use them to verify the contents. If the contents 782 * assigned, set the ops and use them to verify the contents. If the contents
781 * cannot be verified, we'll clear XBF_DONE. We assume the buffer has no 783 * cannot be verified, we'll clear XBF_DONE. We assume the buffer has no
782 * recorded errors and is already in XBF_DONE state. 784 * recorded errors and is already in XBF_DONE state.
785 *
786 * Under normal operations, every in-core buffer must have buffer ops assigned
787 * to them when the buffer is read in from disk so that we can validate the
788 * metadata.
789 *
790 * However, there are two scenarios where one can encounter in-core buffers
791 * that don't have buffer ops. The first is during log recovery of buffers on
792 * a V4 filesystem, though these buffers are purged at the end of recovery.
793 *
794 * The other is online repair, which tries to match arbitrary metadata blocks
795 * with btree types in order to find the root. If online repair doesn't match
796 * the buffer with /any/ btree type, the buffer remains in memory in DONE state
797 * with no ops, and a subsequent read_buf call from elsewhere will not set the
798 * ops. This function helps us fix this situation.
783 */ 799 */
784int 800int
785xfs_buf_ensure_ops( 801xfs_buf_ensure_ops(
@@ -1536,8 +1552,7 @@ __xfs_buf_submit(
1536 xfs_buf_ioerror(bp, -EIO); 1552 xfs_buf_ioerror(bp, -EIO);
1537 bp->b_flags &= ~XBF_DONE; 1553 bp->b_flags &= ~XBF_DONE;
1538 xfs_buf_stale(bp); 1554 xfs_buf_stale(bp);
1539 if (bp->b_flags & XBF_ASYNC) 1555 xfs_buf_ioend(bp);
1540 xfs_buf_ioend(bp);
1541 return -EIO; 1556 return -EIO;
1542 } 1557 }
1543 1558