aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c3
-rw-r--r--fs/9p/vfs_dir.c6
-rw-r--r--fs/9p/vfs_inode.c9
-rw-r--r--fs/9p/vfs_super.c20
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/bio-integrity.c4
-rw-r--r--fs/ceph/addr.c12
-rw-r--r--fs/ceph/auth_x.c15
-rw-r--r--fs/ceph/caps.c32
-rw-r--r--fs/ceph/debugfs.c4
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/ceph/inode.c5
-rw-r--r--fs/ceph/locks.c14
-rw-r--r--fs/ceph/mds_client.c101
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/osd_client.c2
-rw-r--r--fs/ceph/snap.c89
-rw-r--r--fs/ceph/super.h11
-rw-r--r--fs/ceph/xattr.c1
-rw-r--r--fs/cifs/cifs_unicode.h18
-rw-r--r--fs/cifs/cifs_uniupr.h16
-rw-r--r--fs/cifs/cifsencrypt.c57
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/connect.c62
-rw-r--r--fs/cifs/dir.c157
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/cifs/inode.c32
-rw-r--r--fs/cifs/netmisc.c22
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/ecryptfs/crypto.c3
-rw-r--r--fs/ecryptfs/inode.c31
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/ecryptfs/kthread.c2
-rw-r--r--fs/ecryptfs/messaging.c2
-rw-r--r--fs/ecryptfs/miscdev.c2
-rw-r--r--fs/exec.c14
-rw-r--r--fs/fcntl.c10
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fuse/dev.c42
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namespace.c23
-rw-r--r--fs/nfsd/nfs4state.c28
-rw-r--r--fs/nfsd/state.h14
-rw-r--r--fs/nfsd/vfs.c14
-rw-r--r--fs/nilfs2/the_nilfs.c1
-rw-r--r--fs/notify/fanotify/fanotify.c3
-rw-r--r--fs/notify/fanotify/fanotify_user.c29
-rw-r--r--fs/notify/fsnotify.c68
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/blockcheck.c4
-rw-r--r--fs/ocfs2/file.c15
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/ocfs2/mmap.c8
-rw-r--r--fs/ocfs2/namei.c302
-rw-r--r--fs/ocfs2/refcounttree.c5
-rw-r--r--fs/ocfs2/suballoc.c219
-rw-r--r--fs/ocfs2/suballoc.h21
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c13
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c42
-rw-r--r--fs/xfs/xfs_bmap.c14
-rw-r--r--fs/xfs/xfs_fs.h4
-rw-r--r--fs/xfs/xfs_fsops.c31
-rw-r--r--fs/xfs/xfs_fsops.h2
-rw-r--r--fs/xfs/xfs_ialloc.c16
-rw-r--r--fs/xfs/xfs_inode.c49
-rw-r--r--fs/xfs/xfs_log.c7
-rw-r--r--fs/xfs/xfs_log_cil.c263
-rw-r--r--fs/xfs/xfs_log_priv.h13
-rw-r--r--fs/xfs/xfs_trans.c5
-rw-r--r--fs/xfs/xfs_trans_priv.h3
-rw-r--r--fs/xfs/xfs_vnodeops.c13
80 files changed, 1331 insertions, 776 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 358563689064..6406f896bf95 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -242,7 +242,8 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
242 } 242 }
243 kfree(wnames); 243 kfree(wnames);
244fid_out: 244fid_out:
245 v9fs_fid_add(dentry, fid); 245 if (!IS_ERR(fid))
246 v9fs_fid_add(dentry, fid);
246err_out: 247err_out:
247 up_read(&v9ses->rename_sem); 248 up_read(&v9ses->rename_sem);
248 return fid; 249 return fid;
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 16c8a2a98c1b..899f168fd19c 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -292,9 +292,11 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
292 292
293 fid = filp->private_data; 293 fid = filp->private_data;
294 P9_DPRINTK(P9_DEBUG_VFS, 294 P9_DPRINTK(P9_DEBUG_VFS,
295 "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid); 295 "v9fs_dir_release: inode: %p filp: %p fid: %d\n",
296 inode, filp, fid ? fid->fid : -1);
296 filemap_write_and_wait(inode->i_mapping); 297 filemap_write_and_wait(inode->i_mapping);
297 p9_client_clunk(fid); 298 if (fid)
299 p9_client_clunk(fid);
298 return 0; 300 return 0;
299} 301}
300 302
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c7c23eab9440..9e670d527646 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -730,7 +730,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
730 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 730 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
731 goto error; 731 goto error;
732 } 732 }
733 dentry->d_op = &v9fs_cached_dentry_operations; 733 if (v9ses->cache)
734 dentry->d_op = &v9fs_cached_dentry_operations;
735 else
736 dentry->d_op = &v9fs_dentry_operations;
734 d_instantiate(dentry, inode); 737 d_instantiate(dentry, inode);
735 err = v9fs_fid_add(dentry, fid); 738 err = v9fs_fid_add(dentry, fid);
736 if (err < 0) 739 if (err < 0)
@@ -1128,6 +1131,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1128 v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); 1131 v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
1129 generic_fillattr(dentry->d_inode, stat); 1132 generic_fillattr(dentry->d_inode, stat);
1130 1133
1134 p9stat_free(st);
1131 kfree(st); 1135 kfree(st);
1132 return 0; 1136 return 0;
1133} 1137}
@@ -1489,6 +1493,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1489 1493
1490 retval = strnlen(buffer, buflen); 1494 retval = strnlen(buffer, buflen);
1491done: 1495done:
1496 p9stat_free(st);
1492 kfree(st); 1497 kfree(st);
1493 return retval; 1498 return retval;
1494} 1499}
@@ -1942,7 +1947,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1942 .unlink = v9fs_vfs_unlink, 1947 .unlink = v9fs_vfs_unlink,
1943 .mkdir = v9fs_vfs_mkdir, 1948 .mkdir = v9fs_vfs_mkdir,
1944 .rmdir = v9fs_vfs_rmdir, 1949 .rmdir = v9fs_vfs_rmdir,
1945 .mknod = v9fs_vfs_mknod_dotl, 1950 .mknod = v9fs_vfs_mknod,
1946 .rename = v9fs_vfs_rename, 1951 .rename = v9fs_vfs_rename,
1947 .getattr = v9fs_vfs_getattr, 1952 .getattr = v9fs_vfs_getattr,
1948 .setattr = v9fs_vfs_setattr, 1953 .setattr = v9fs_vfs_setattr,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f9311077de68..1d12ba0ed3db 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -122,6 +122,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
122 fid = v9fs_session_init(v9ses, dev_name, data); 122 fid = v9fs_session_init(v9ses, dev_name, data);
123 if (IS_ERR(fid)) { 123 if (IS_ERR(fid)) {
124 retval = PTR_ERR(fid); 124 retval = PTR_ERR(fid);
125 /*
126 * we need to call session_close to tear down some
127 * of the data structure setup by session_init
128 */
125 goto close_session; 129 goto close_session;
126 } 130 }
127 131
@@ -144,7 +148,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
144 retval = -ENOMEM; 148 retval = -ENOMEM;
145 goto release_sb; 149 goto release_sb;
146 } 150 }
147
148 sb->s_root = root; 151 sb->s_root = root;
149 152
150 if (v9fs_proto_dotl(v9ses)) { 153 if (v9fs_proto_dotl(v9ses)) {
@@ -152,7 +155,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
152 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 155 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
153 if (IS_ERR(st)) { 156 if (IS_ERR(st)) {
154 retval = PTR_ERR(st); 157 retval = PTR_ERR(st);
155 goto clunk_fid; 158 goto release_sb;
156 } 159 }
157 160
158 v9fs_stat2inode_dotl(st, root->d_inode); 161 v9fs_stat2inode_dotl(st, root->d_inode);
@@ -162,7 +165,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
162 st = p9_client_stat(fid); 165 st = p9_client_stat(fid);
163 if (IS_ERR(st)) { 166 if (IS_ERR(st)) {
164 retval = PTR_ERR(st); 167 retval = PTR_ERR(st);
165 goto clunk_fid; 168 goto release_sb;
166 } 169 }
167 170
168 root->d_inode->i_ino = v9fs_qid2ino(&st->qid); 171 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
@@ -174,19 +177,24 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
174 177
175 v9fs_fid_add(root, fid); 178 v9fs_fid_add(root, fid);
176 179
177P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 180 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
178 simple_set_mnt(mnt, sb); 181 simple_set_mnt(mnt, sb);
179 return 0; 182 return 0;
180 183
181clunk_fid: 184clunk_fid:
182 p9_client_clunk(fid); 185 p9_client_clunk(fid);
183
184close_session: 186close_session:
185 v9fs_session_close(v9ses); 187 v9fs_session_close(v9ses);
186 kfree(v9ses); 188 kfree(v9ses);
187 return retval; 189 return retval;
188
189release_sb: 190release_sb:
191 /*
192 * we will do the session_close and root dentry release
193 * in the below call. But we need to clunk fid, because we haven't
194 * attached the fid to dentry so it won't get clunked
195 * automatically.
196 */
197 p9_client_clunk(fid);
190 deactivate_locked_super(sb); 198 deactivate_locked_super(sb);
191 return retval; 199 return retval;
192} 200}
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a7528b913936..fd0cc0bf9a40 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -724,7 +724,7 @@ static int __init init_misc_binfmt(void)
724{ 724{
725 int err = register_filesystem(&bm_fs_type); 725 int err = register_filesystem(&bm_fs_type);
726 if (!err) { 726 if (!err) {
727 err = register_binfmt(&misc_format); 727 err = insert_binfmt(&misc_format);
728 if (err) 728 if (err)
729 unregister_filesystem(&bm_fs_type); 729 unregister_filesystem(&bm_fs_type);
730 } 730 }
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 612a5c38d3c1..4d0ff5ee27b8 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -413,10 +413,10 @@ int bio_integrity_prep(struct bio *bio)
413 413
414 /* Allocate kernel buffer for protection data */ 414 /* Allocate kernel buffer for protection data */
415 len = sectors * blk_integrity_tuple_size(bi); 415 len = sectors * blk_integrity_tuple_size(bi);
416 buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp); 416 buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
417 if (unlikely(buf == NULL)) { 417 if (unlikely(buf == NULL)) {
418 printk(KERN_ERR "could not allocate integrity buffer\n"); 418 printk(KERN_ERR "could not allocate integrity buffer\n");
419 return -EIO; 419 return -ENOMEM;
420 } 420 }
421 421
422 end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 422 end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5598a0d02295..4cfce1ee31fa 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
87 87
88 /* dirty the head */ 88 /* dirty the head */
89 spin_lock(&inode->i_lock); 89 spin_lock(&inode->i_lock);
90 if (ci->i_wrbuffer_ref_head == 0) 90 if (ci->i_head_snapc == NULL)
91 ci->i_head_snapc = ceph_get_snap_context(snapc); 91 ci->i_head_snapc = ceph_get_snap_context(snapc);
92 ++ci->i_wrbuffer_ref_head; 92 ++ci->i_wrbuffer_ref_head;
93 if (ci->i_wrbuffer_ref == 0) 93 if (ci->i_wrbuffer_ref == 0)
@@ -105,13 +105,7 @@ static int ceph_set_page_dirty(struct page *page)
105 spin_lock_irq(&mapping->tree_lock); 105 spin_lock_irq(&mapping->tree_lock);
106 if (page->mapping) { /* Race with truncate? */ 106 if (page->mapping) { /* Race with truncate? */
107 WARN_ON_ONCE(!PageUptodate(page)); 107 WARN_ON_ONCE(!PageUptodate(page));
108 108 account_page_dirtied(page, page->mapping);
109 if (mapping_cap_account_dirty(mapping)) {
110 __inc_zone_page_state(page, NR_FILE_DIRTY);
111 __inc_bdi_stat(mapping->backing_dev_info,
112 BDI_RECLAIMABLE);
113 task_io_account_write(PAGE_CACHE_SIZE);
114 }
115 radix_tree_tag_set(&mapping->page_tree, 109 radix_tree_tag_set(&mapping->page_tree,
116 page_index(page), PAGECACHE_TAG_DIRTY); 110 page_index(page), PAGECACHE_TAG_DIRTY);
117 111
@@ -352,7 +346,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
352 break; 346 break;
353 } 347 }
354 } 348 }
355 if (!snapc && ci->i_head_snapc) { 349 if (!snapc && ci->i_wrbuffer_ref_head) {
356 snapc = ceph_get_snap_context(ci->i_head_snapc); 350 snapc = ceph_get_snap_context(ci->i_head_snapc);
357 dout(" head snapc %p has %d dirty pages\n", 351 dout(" head snapc %p has %d dirty pages\n",
358 snapc, ci->i_wrbuffer_ref_head); 352 snapc, ci->i_wrbuffer_ref_head);
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 582e0b2caf8a..a2d002cbdec2 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -376,7 +376,7 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
376 376
377 th = get_ticket_handler(ac, service); 377 th = get_ticket_handler(ac, service);
378 378
379 if (!th) { 379 if (IS_ERR(th)) {
380 *pneed |= service; 380 *pneed |= service;
381 continue; 381 continue;
382 } 382 }
@@ -399,6 +399,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
399 struct ceph_x_ticket_handler *th = 399 struct ceph_x_ticket_handler *th =
400 get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); 400 get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
401 401
402 if (IS_ERR(th))
403 return PTR_ERR(th);
404
402 ceph_x_validate_tickets(ac, &need); 405 ceph_x_validate_tickets(ac, &need);
403 406
404 dout("build_request want %x have %x need %x\n", 407 dout("build_request want %x have %x need %x\n",
@@ -450,7 +453,6 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
450 return -ERANGE; 453 return -ERANGE;
451 head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); 454 head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
452 455
453 BUG_ON(!th);
454 ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); 456 ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
455 if (ret) 457 if (ret)
456 return ret; 458 return ret;
@@ -505,7 +507,8 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
505 507
506 case CEPHX_GET_PRINCIPAL_SESSION_KEY: 508 case CEPHX_GET_PRINCIPAL_SESSION_KEY:
507 th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); 509 th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
508 BUG_ON(!th); 510 if (IS_ERR(th))
511 return PTR_ERR(th);
509 ret = ceph_x_proc_ticket_reply(ac, &th->session_key, 512 ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
510 buf + sizeof(*head), end); 513 buf + sizeof(*head), end);
511 break; 514 break;
@@ -563,8 +566,8 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
563 void *end = p + sizeof(au->reply_buf); 566 void *end = p + sizeof(au->reply_buf);
564 567
565 th = get_ticket_handler(ac, au->service); 568 th = get_ticket_handler(ac, au->service);
566 if (!th) 569 if (IS_ERR(th))
567 return -EIO; /* hrm! */ 570 return PTR_ERR(th);
568 ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); 571 ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
569 if (ret < 0) 572 if (ret < 0)
570 return ret; 573 return ret;
@@ -626,7 +629,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
626 struct ceph_x_ticket_handler *th; 629 struct ceph_x_ticket_handler *th;
627 630
628 th = get_ticket_handler(ac, peer_type); 631 th = get_ticket_handler(ac, peer_type);
629 if (th && !IS_ERR(th)) 632 if (!IS_ERR(th))
630 remove_ticket_handler(ac, th); 633 remove_ticket_handler(ac, th);
631} 634}
632 635
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7bf182b03973..a2069b6680ae 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1082 gid_t gid; 1082 gid_t gid;
1083 struct ceph_mds_session *session; 1083 struct ceph_mds_session *session;
1084 u64 xattr_version = 0; 1084 u64 xattr_version = 0;
1085 struct ceph_buffer *xattr_blob = NULL;
1085 int delayed = 0; 1086 int delayed = 0;
1086 u64 flush_tid = 0; 1087 u64 flush_tid = 0;
1087 int i; 1088 int i;
@@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1142 for (i = 0; i < CEPH_CAP_BITS; i++) 1143 for (i = 0; i < CEPH_CAP_BITS; i++)
1143 if (flushing & (1 << i)) 1144 if (flushing & (1 << i))
1144 ci->i_cap_flush_tid[i] = flush_tid; 1145 ci->i_cap_flush_tid[i] = flush_tid;
1146
1147 follows = ci->i_head_snapc->seq;
1148 } else {
1149 follows = 0;
1145 } 1150 }
1146 1151
1147 keep = cap->implemented; 1152 keep = cap->implemented;
@@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1155 mtime = inode->i_mtime; 1160 mtime = inode->i_mtime;
1156 atime = inode->i_atime; 1161 atime = inode->i_atime;
1157 time_warp_seq = ci->i_time_warp_seq; 1162 time_warp_seq = ci->i_time_warp_seq;
1158 follows = ci->i_snap_realm->cached_context->seq;
1159 uid = inode->i_uid; 1163 uid = inode->i_uid;
1160 gid = inode->i_gid; 1164 gid = inode->i_gid;
1161 mode = inode->i_mode; 1165 mode = inode->i_mode;
1162 1166
1163 if (dropping & CEPH_CAP_XATTR_EXCL) { 1167 if (flushing & CEPH_CAP_XATTR_EXCL) {
1164 __ceph_build_xattrs_blob(ci); 1168 __ceph_build_xattrs_blob(ci);
1165 xattr_version = ci->i_xattrs.version + 1; 1169 xattr_blob = ci->i_xattrs.blob;
1170 xattr_version = ci->i_xattrs.version;
1166 } 1171 }
1167 1172
1168 spin_unlock(&inode->i_lock); 1173 spin_unlock(&inode->i_lock);
@@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1170 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, 1175 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
1171 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, 1176 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
1172 size, max_size, &mtime, &atime, time_warp_seq, 1177 size, max_size, &mtime, &atime, time_warp_seq,
1173 uid, gid, mode, 1178 uid, gid, mode, xattr_version, xattr_blob,
1174 xattr_version,
1175 (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL,
1176 follows); 1179 follows);
1177 if (ret < 0) { 1180 if (ret < 0) {
1178 dout("error sending cap msg, must requeue %p\n", inode); 1181 dout("error sending cap msg, must requeue %p\n", inode);
@@ -1282,7 +1285,7 @@ retry:
1282 &capsnap->mtime, &capsnap->atime, 1285 &capsnap->mtime, &capsnap->atime,
1283 capsnap->time_warp_seq, 1286 capsnap->time_warp_seq,
1284 capsnap->uid, capsnap->gid, capsnap->mode, 1287 capsnap->uid, capsnap->gid, capsnap->mode,
1285 0, NULL, 1288 capsnap->xattr_version, capsnap->xattr_blob,
1286 capsnap->follows); 1289 capsnap->follows);
1287 1290
1288 next_follows = capsnap->follows + 1; 1291 next_follows = capsnap->follows + 1;
@@ -1332,7 +1335,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1332 ceph_cap_string(was | mask)); 1335 ceph_cap_string(was | mask));
1333 ci->i_dirty_caps |= mask; 1336 ci->i_dirty_caps |= mask;
1334 if (was == 0) { 1337 if (was == 0) {
1335 dout(" inode %p now dirty\n", &ci->vfs_inode); 1338 if (!ci->i_head_snapc)
1339 ci->i_head_snapc = ceph_get_snap_context(
1340 ci->i_snap_realm->cached_context);
1341 dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode,
1342 ci->i_head_snapc);
1336 BUG_ON(!list_empty(&ci->i_dirty_item)); 1343 BUG_ON(!list_empty(&ci->i_dirty_item));
1337 spin_lock(&mdsc->cap_dirty_lock); 1344 spin_lock(&mdsc->cap_dirty_lock);
1338 list_add(&ci->i_dirty_item, &mdsc->cap_dirty); 1345 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -2190,7 +2197,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2190 2197
2191 if (ci->i_head_snapc == snapc) { 2198 if (ci->i_head_snapc == snapc) {
2192 ci->i_wrbuffer_ref_head -= nr; 2199 ci->i_wrbuffer_ref_head -= nr;
2193 if (!ci->i_wrbuffer_ref_head) { 2200 if (ci->i_wrbuffer_ref_head == 0 &&
2201 ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) {
2202 BUG_ON(!ci->i_head_snapc);
2194 ceph_put_snap_context(ci->i_head_snapc); 2203 ceph_put_snap_context(ci->i_head_snapc);
2195 ci->i_head_snapc = NULL; 2204 ci->i_head_snapc = NULL;
2196 } 2205 }
@@ -2483,6 +2492,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2483 dout(" inode %p now clean\n", inode); 2492 dout(" inode %p now clean\n", inode);
2484 BUG_ON(!list_empty(&ci->i_dirty_item)); 2493 BUG_ON(!list_empty(&ci->i_dirty_item));
2485 drop = 1; 2494 drop = 1;
2495 if (ci->i_wrbuffer_ref_head == 0) {
2496 BUG_ON(!ci->i_head_snapc);
2497 ceph_put_snap_context(ci->i_head_snapc);
2498 ci->i_head_snapc = NULL;
2499 }
2486 } else { 2500 } else {
2487 BUG_ON(list_empty(&ci->i_dirty_item)); 2501 BUG_ON(list_empty(&ci->i_dirty_item));
2488 } 2502 }
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 360c4f22718d..6fd8b20a8611 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -171,6 +171,8 @@ static int mdsc_show(struct seq_file *s, void *p)
171 } else if (req->r_dentry) { 171 } else if (req->r_dentry) {
172 path = ceph_mdsc_build_path(req->r_dentry, &pathlen, 172 path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
173 &pathbase, 0); 173 &pathbase, 0);
174 if (IS_ERR(path))
175 path = NULL;
174 spin_lock(&req->r_dentry->d_lock); 176 spin_lock(&req->r_dentry->d_lock);
175 seq_printf(s, " #%llx/%.*s (%s)", 177 seq_printf(s, " #%llx/%.*s (%s)",
176 ceph_ino(req->r_dentry->d_parent->d_inode), 178 ceph_ino(req->r_dentry->d_parent->d_inode),
@@ -187,6 +189,8 @@ static int mdsc_show(struct seq_file *s, void *p)
187 if (req->r_old_dentry) { 189 if (req->r_old_dentry) {
188 path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, 190 path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
189 &pathbase, 0); 191 &pathbase, 0);
192 if (IS_ERR(path))
193 path = NULL;
190 spin_lock(&req->r_old_dentry->d_lock); 194 spin_lock(&req->r_old_dentry->d_lock);
191 seq_printf(s, " #%llx/%.*s (%s)", 195 seq_printf(s, " #%llx/%.*s (%s)",
192 ceph_ino(req->r_old_dentry->d_parent->d_inode), 196 ceph_ino(req->r_old_dentry->d_parent->d_inode),
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 67bbb41d5526..6e4f43ff23ec 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -46,7 +46,7 @@ int ceph_init_dentry(struct dentry *dentry)
46 else 46 else
47 dentry->d_op = &ceph_snap_dentry_ops; 47 dentry->d_op = &ceph_snap_dentry_ops;
48 48
49 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS); 49 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
50 if (!di) 50 if (!di)
51 return -ENOMEM; /* oh well */ 51 return -ENOMEM; /* oh well */
52 52
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5d893d31e399..e7cca414da03 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -677,6 +677,7 @@ static int fill_inode(struct inode *inode,
677 if (ci->i_files == 0 && ci->i_subdirs == 0 && 677 if (ci->i_files == 0 && ci->i_subdirs == 0 &&
678 ceph_snap(inode) == CEPH_NOSNAP && 678 ceph_snap(inode) == CEPH_NOSNAP &&
679 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && 679 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
680 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
680 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 681 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
681 dout(" marking %p complete (empty)\n", inode); 682 dout(" marking %p complete (empty)\n", inode);
682 ci->i_ceph_flags |= CEPH_I_COMPLETE; 683 ci->i_ceph_flags |= CEPH_I_COMPLETE;
@@ -1229,11 +1230,11 @@ retry_lookup:
1229 in = dn->d_inode; 1230 in = dn->d_inode;
1230 } else { 1231 } else {
1231 in = ceph_get_inode(parent->d_sb, vino); 1232 in = ceph_get_inode(parent->d_sb, vino);
1232 if (in == NULL) { 1233 if (IS_ERR(in)) {
1233 dout("new_inode badness\n"); 1234 dout("new_inode badness\n");
1234 d_delete(dn); 1235 d_delete(dn);
1235 dput(dn); 1236 dput(dn);
1236 err = -ENOMEM; 1237 err = PTR_ERR(in);
1237 goto out; 1238 goto out;
1238 } 1239 }
1239 dn = splice_dentry(dn, in, NULL); 1240 dn = splice_dentry(dn, in, NULL);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index ae85af06454f..ff4e753aae92 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -82,7 +82,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
82 length = fl->fl_end - fl->fl_start + 1; 82 length = fl->fl_end - fl->fl_start + 1;
83 83
84 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 84 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
85 (u64)fl->fl_pid, (u64)fl->fl_nspid, 85 (u64)fl->fl_pid,
86 (u64)(unsigned long)fl->fl_nspid,
86 lock_cmd, fl->fl_start, 87 lock_cmd, fl->fl_start,
87 length, wait); 88 length, wait);
88 if (!err) { 89 if (!err) {
@@ -92,7 +93,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
92 /* undo! This should only happen if the kernel detects 93 /* undo! This should only happen if the kernel detects
93 * local deadlock. */ 94 * local deadlock. */
94 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 95 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
95 (u64)fl->fl_pid, (u64)fl->fl_nspid, 96 (u64)fl->fl_pid,
97 (u64)(unsigned long)fl->fl_nspid,
96 CEPH_LOCK_UNLOCK, fl->fl_start, 98 CEPH_LOCK_UNLOCK, fl->fl_start,
97 length, 0); 99 length, 0);
98 dout("got %d on posix_lock_file, undid lock", err); 100 dout("got %d on posix_lock_file, undid lock", err);
@@ -132,7 +134,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
132 length = fl->fl_end - fl->fl_start + 1; 134 length = fl->fl_end - fl->fl_start + 1;
133 135
134 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 136 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
135 file, (u64)fl->fl_pid, (u64)fl->fl_nspid, 137 file, (u64)fl->fl_pid,
138 (u64)(unsigned long)fl->fl_nspid,
136 lock_cmd, fl->fl_start, 139 lock_cmd, fl->fl_start,
137 length, wait); 140 length, wait);
138 if (!err) { 141 if (!err) {
@@ -141,7 +144,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
141 ceph_lock_message(CEPH_LOCK_FLOCK, 144 ceph_lock_message(CEPH_LOCK_FLOCK,
142 CEPH_MDS_OP_SETFILELOCK, 145 CEPH_MDS_OP_SETFILELOCK,
143 file, (u64)fl->fl_pid, 146 file, (u64)fl->fl_pid,
144 (u64)fl->fl_nspid, 147 (u64)(unsigned long)fl->fl_nspid,
145 CEPH_LOCK_UNLOCK, fl->fl_start, 148 CEPH_LOCK_UNLOCK, fl->fl_start,
146 length, 0); 149 length, 0);
147 dout("got %d on flock_lock_file_wait, undid lock", err); 150 dout("got %d on flock_lock_file_wait, undid lock", err);
@@ -235,7 +238,8 @@ int lock_to_ceph_filelock(struct file_lock *lock,
235 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); 238 cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
236 cephlock->client = cpu_to_le64(0); 239 cephlock->client = cpu_to_le64(0);
237 cephlock->pid = cpu_to_le64(lock->fl_pid); 240 cephlock->pid = cpu_to_le64(lock->fl_pid);
238 cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); 241 cephlock->pid_namespace =
242 cpu_to_le64((u64)(unsigned long)lock->fl_nspid);
239 243
240 switch (lock->fl_type) { 244 switch (lock->fl_type) {
241 case F_RDLCK: 245 case F_RDLCK:
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a75ddbf9fe37..f091b1351786 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -560,6 +560,13 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
560 * 560 *
561 * Called under mdsc->mutex. 561 * Called under mdsc->mutex.
562 */ 562 */
563struct dentry *get_nonsnap_parent(struct dentry *dentry)
564{
565 while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
566 dentry = dentry->d_parent;
567 return dentry;
568}
569
563static int __choose_mds(struct ceph_mds_client *mdsc, 570static int __choose_mds(struct ceph_mds_client *mdsc,
564 struct ceph_mds_request *req) 571 struct ceph_mds_request *req)
565{ 572{
@@ -590,14 +597,29 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
590 if (req->r_inode) { 597 if (req->r_inode) {
591 inode = req->r_inode; 598 inode = req->r_inode;
592 } else if (req->r_dentry) { 599 } else if (req->r_dentry) {
593 if (req->r_dentry->d_inode) { 600 struct inode *dir = req->r_dentry->d_parent->d_inode;
601
602 if (dir->i_sb != mdsc->client->sb) {
603 /* not this fs! */
604 inode = req->r_dentry->d_inode;
605 } else if (ceph_snap(dir) != CEPH_NOSNAP) {
606 /* direct snapped/virtual snapdir requests
607 * based on parent dir inode */
608 struct dentry *dn =
609 get_nonsnap_parent(req->r_dentry->d_parent);
610 inode = dn->d_inode;
611 dout("__choose_mds using nonsnap parent %p\n", inode);
612 } else if (req->r_dentry->d_inode) {
613 /* dentry target */
594 inode = req->r_dentry->d_inode; 614 inode = req->r_dentry->d_inode;
595 } else { 615 } else {
596 inode = req->r_dentry->d_parent->d_inode; 616 /* dir + name */
617 inode = dir;
597 hash = req->r_dentry->d_name.hash; 618 hash = req->r_dentry->d_name.hash;
598 is_hash = true; 619 is_hash = true;
599 } 620 }
600 } 621 }
622
601 dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, 623 dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
602 (int)hash, mode); 624 (int)hash, mode);
603 if (!inode) 625 if (!inode)
@@ -2208,7 +2230,7 @@ static void handle_session(struct ceph_mds_session *session,
2208 pr_info("mds%d reconnect denied\n", session->s_mds); 2230 pr_info("mds%d reconnect denied\n", session->s_mds);
2209 remove_session_caps(session); 2231 remove_session_caps(session);
2210 wake = 1; /* for good measure */ 2232 wake = 1; /* for good measure */
2211 complete_all(&mdsc->session_close_waiters); 2233 wake_up_all(&mdsc->session_close_wq);
2212 kick_requests(mdsc, mds); 2234 kick_requests(mdsc, mds);
2213 break; 2235 break;
2214 2236
@@ -2302,7 +2324,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2302 path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); 2324 path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
2303 if (IS_ERR(path)) { 2325 if (IS_ERR(path)) {
2304 err = PTR_ERR(path); 2326 err = PTR_ERR(path);
2305 BUG_ON(err); 2327 goto out_dput;
2306 } 2328 }
2307 } else { 2329 } else {
2308 path = NULL; 2330 path = NULL;
@@ -2310,7 +2332,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2310 } 2332 }
2311 err = ceph_pagelist_encode_string(pagelist, path, pathlen); 2333 err = ceph_pagelist_encode_string(pagelist, path, pathlen);
2312 if (err) 2334 if (err)
2313 goto out; 2335 goto out_free;
2314 2336
2315 spin_lock(&inode->i_lock); 2337 spin_lock(&inode->i_lock);
2316 cap->seq = 0; /* reset cap seq */ 2338 cap->seq = 0; /* reset cap seq */
@@ -2354,8 +2376,9 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2354 unlock_kernel(); 2376 unlock_kernel();
2355 } 2377 }
2356 2378
2357out: 2379out_free:
2358 kfree(path); 2380 kfree(path);
2381out_dput:
2359 dput(dentry); 2382 dput(dentry);
2360 return err; 2383 return err;
2361} 2384}
@@ -2876,7 +2899,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
2876 return -ENOMEM; 2899 return -ENOMEM;
2877 2900
2878 init_completion(&mdsc->safe_umount_waiters); 2901 init_completion(&mdsc->safe_umount_waiters);
2879 init_completion(&mdsc->session_close_waiters); 2902 init_waitqueue_head(&mdsc->session_close_wq);
2880 INIT_LIST_HEAD(&mdsc->waiting_for_map); 2903 INIT_LIST_HEAD(&mdsc->waiting_for_map);
2881 mdsc->sessions = NULL; 2904 mdsc->sessions = NULL;
2882 mdsc->max_sessions = 0; 2905 mdsc->max_sessions = 0;
@@ -3021,6 +3044,23 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
3021 wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); 3044 wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush));
3022} 3045}
3023 3046
3047/*
3048 * true if all sessions are closed, or we force unmount
3049 */
3050bool done_closing_sessions(struct ceph_mds_client *mdsc)
3051{
3052 int i, n = 0;
3053
3054 if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN)
3055 return true;
3056
3057 mutex_lock(&mdsc->mutex);
3058 for (i = 0; i < mdsc->max_sessions; i++)
3059 if (mdsc->sessions[i])
3060 n++;
3061 mutex_unlock(&mdsc->mutex);
3062 return n == 0;
3063}
3024 3064
3025/* 3065/*
3026 * called after sb is ro. 3066 * called after sb is ro.
@@ -3029,45 +3069,32 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
3029{ 3069{
3030 struct ceph_mds_session *session; 3070 struct ceph_mds_session *session;
3031 int i; 3071 int i;
3032 int n;
3033 struct ceph_client *client = mdsc->client; 3072 struct ceph_client *client = mdsc->client;
3034 unsigned long started, timeout = client->mount_args->mount_timeout * HZ; 3073 unsigned long timeout = client->mount_args->mount_timeout * HZ;
3035 3074
3036 dout("close_sessions\n"); 3075 dout("close_sessions\n");
3037 3076
3038 mutex_lock(&mdsc->mutex);
3039
3040 /* close sessions */ 3077 /* close sessions */
3041 started = jiffies; 3078 mutex_lock(&mdsc->mutex);
3042 while (time_before(jiffies, started + timeout)) { 3079 for (i = 0; i < mdsc->max_sessions; i++) {
3043 dout("closing sessions\n"); 3080 session = __ceph_lookup_mds_session(mdsc, i);
3044 n = 0; 3081 if (!session)
3045 for (i = 0; i < mdsc->max_sessions; i++) { 3082 continue;
3046 session = __ceph_lookup_mds_session(mdsc, i);
3047 if (!session)
3048 continue;
3049 mutex_unlock(&mdsc->mutex);
3050 mutex_lock(&session->s_mutex);
3051 __close_session(mdsc, session);
3052 mutex_unlock(&session->s_mutex);
3053 ceph_put_mds_session(session);
3054 mutex_lock(&mdsc->mutex);
3055 n++;
3056 }
3057 if (n == 0)
3058 break;
3059
3060 if (client->mount_state == CEPH_MOUNT_SHUTDOWN)
3061 break;
3062
3063 dout("waiting for sessions to close\n");
3064 mutex_unlock(&mdsc->mutex); 3083 mutex_unlock(&mdsc->mutex);
3065 wait_for_completion_timeout(&mdsc->session_close_waiters, 3084 mutex_lock(&session->s_mutex);
3066 timeout); 3085 __close_session(mdsc, session);
3086 mutex_unlock(&session->s_mutex);
3087 ceph_put_mds_session(session);
3067 mutex_lock(&mdsc->mutex); 3088 mutex_lock(&mdsc->mutex);
3068 } 3089 }
3090 mutex_unlock(&mdsc->mutex);
3091
3092 dout("waiting for sessions to close\n");
3093 wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
3094 timeout);
3069 3095
3070 /* tear down remaining sessions */ 3096 /* tear down remaining sessions */
3097 mutex_lock(&mdsc->mutex);
3071 for (i = 0; i < mdsc->max_sessions; i++) { 3098 for (i = 0; i < mdsc->max_sessions; i++) {
3072 if (mdsc->sessions[i]) { 3099 if (mdsc->sessions[i]) {
3073 session = get_session(mdsc->sessions[i]); 3100 session = get_session(mdsc->sessions[i]);
@@ -3080,9 +3107,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
3080 mutex_lock(&mdsc->mutex); 3107 mutex_lock(&mdsc->mutex);
3081 } 3108 }
3082 } 3109 }
3083
3084 WARN_ON(!list_empty(&mdsc->cap_delay_list)); 3110 WARN_ON(!list_empty(&mdsc->cap_delay_list));
3085
3086 mutex_unlock(&mdsc->mutex); 3111 mutex_unlock(&mdsc->mutex);
3087 3112
3088 ceph_cleanup_empty_realms(mdsc); 3113 ceph_cleanup_empty_realms(mdsc);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ab7e89f5e344..c98267ce6d2a 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -234,7 +234,8 @@ struct ceph_mds_client {
234 struct mutex mutex; /* all nested structures */ 234 struct mutex mutex; /* all nested structures */
235 235
236 struct ceph_mdsmap *mdsmap; 236 struct ceph_mdsmap *mdsmap;
237 struct completion safe_umount_waiters, session_close_waiters; 237 struct completion safe_umount_waiters;
238 wait_queue_head_t session_close_wq;
238 struct list_head waiting_for_map; 239 struct list_head waiting_for_map;
239 240
240 struct ceph_mds_session **sessions; /* NULL for mds if no session */ 241 struct ceph_mds_session **sessions; /* NULL for mds if no session */
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index bed6391e52c7..dfced1dacbcd 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -661,7 +661,7 @@ static int __send_request(struct ceph_osd_client *osdc,
661 reqhead->reassert_version = req->r_reassert_version; 661 reqhead->reassert_version = req->r_reassert_version;
662 662
663 req->r_stamp = jiffies; 663 req->r_stamp = jiffies;
664 list_move_tail(&osdc->req_lru, &req->r_req_lru_item); 664 list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
665 665
666 ceph_msg_get(req->r_request); /* send consumes a ref */ 666 ceph_msg_get(req->r_request); /* send consumes a ref */
667 ceph_con_send(&req->r_osd->o_con, req->r_request); 667 ceph_con_send(&req->r_osd->o_con, req->r_request);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index c0b26b6badba..4868b9dcac5a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -435,7 +435,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
435{ 435{
436 struct inode *inode = &ci->vfs_inode; 436 struct inode *inode = &ci->vfs_inode;
437 struct ceph_cap_snap *capsnap; 437 struct ceph_cap_snap *capsnap;
438 int used; 438 int used, dirty;
439 439
440 capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); 440 capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
441 if (!capsnap) { 441 if (!capsnap) {
@@ -445,6 +445,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
445 445
446 spin_lock(&inode->i_lock); 446 spin_lock(&inode->i_lock);
447 used = __ceph_caps_used(ci); 447 used = __ceph_caps_used(ci);
448 dirty = __ceph_caps_dirty(ci);
448 if (__ceph_have_pending_cap_snap(ci)) { 449 if (__ceph_have_pending_cap_snap(ci)) {
449 /* there is no point in queuing multiple "pending" cap_snaps, 450 /* there is no point in queuing multiple "pending" cap_snaps,
450 as no new writes are allowed to start when pending, so any 451 as no new writes are allowed to start when pending, so any
@@ -452,11 +453,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
452 cap_snap. lucky us. */ 453 cap_snap. lucky us. */
453 dout("queue_cap_snap %p already pending\n", inode); 454 dout("queue_cap_snap %p already pending\n", inode);
454 kfree(capsnap); 455 kfree(capsnap);
455 } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { 456 } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) ||
457 (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
458 CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) {
456 struct ceph_snap_context *snapc = ci->i_head_snapc; 459 struct ceph_snap_context *snapc = ci->i_head_snapc;
457 460
461 dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode,
462 capsnap, snapc);
458 igrab(inode); 463 igrab(inode);
459 464
460 atomic_set(&capsnap->nref, 1); 465 atomic_set(&capsnap->nref, 1);
461 capsnap->ci = ci; 466 capsnap->ci = ci;
462 INIT_LIST_HEAD(&capsnap->ci_item); 467 INIT_LIST_HEAD(&capsnap->ci_item);
@@ -464,15 +469,21 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
464 469
465 capsnap->follows = snapc->seq - 1; 470 capsnap->follows = snapc->seq - 1;
466 capsnap->issued = __ceph_caps_issued(ci, NULL); 471 capsnap->issued = __ceph_caps_issued(ci, NULL);
467 capsnap->dirty = __ceph_caps_dirty(ci); 472 capsnap->dirty = dirty;
468 473
469 capsnap->mode = inode->i_mode; 474 capsnap->mode = inode->i_mode;
470 capsnap->uid = inode->i_uid; 475 capsnap->uid = inode->i_uid;
471 capsnap->gid = inode->i_gid; 476 capsnap->gid = inode->i_gid;
472 477
473 /* fixme? */ 478 if (dirty & CEPH_CAP_XATTR_EXCL) {
474 capsnap->xattr_blob = NULL; 479 __ceph_build_xattrs_blob(ci);
475 capsnap->xattr_len = 0; 480 capsnap->xattr_blob =
481 ceph_buffer_get(ci->i_xattrs.blob);
482 capsnap->xattr_version = ci->i_xattrs.version;
483 } else {
484 capsnap->xattr_blob = NULL;
485 capsnap->xattr_version = 0;
486 }
476 487
477 /* dirty page count moved from _head to this cap_snap; 488 /* dirty page count moved from _head to this cap_snap;
478 all subsequent writes page dirties occur _after_ this 489 all subsequent writes page dirties occur _after_ this
@@ -480,7 +491,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
480 capsnap->dirty_pages = ci->i_wrbuffer_ref_head; 491 capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
481 ci->i_wrbuffer_ref_head = 0; 492 ci->i_wrbuffer_ref_head = 0;
482 capsnap->context = snapc; 493 capsnap->context = snapc;
483 ci->i_head_snapc = NULL; 494 ci->i_head_snapc =
495 ceph_get_snap_context(ci->i_snap_realm->cached_context);
496 dout(" new snapc is %p\n", ci->i_head_snapc);
484 list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); 497 list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
485 498
486 if (used & CEPH_CAP_FILE_WR) { 499 if (used & CEPH_CAP_FILE_WR) {
@@ -539,6 +552,41 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
539 return 1; /* caller may want to ceph_flush_snaps */ 552 return 1; /* caller may want to ceph_flush_snaps */
540} 553}
541 554
555/*
556 * Queue cap_snaps for snap writeback for this realm and its children.
557 * Called under snap_rwsem, so realm topology won't change.
558 */
559static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
560{
561 struct ceph_inode_info *ci;
562 struct inode *lastinode = NULL;
563 struct ceph_snap_realm *child;
564
565 dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino);
566
567 spin_lock(&realm->inodes_with_caps_lock);
568 list_for_each_entry(ci, &realm->inodes_with_caps,
569 i_snap_realm_item) {
570 struct inode *inode = igrab(&ci->vfs_inode);
571 if (!inode)
572 continue;
573 spin_unlock(&realm->inodes_with_caps_lock);
574 if (lastinode)
575 iput(lastinode);
576 lastinode = inode;
577 ceph_queue_cap_snap(ci);
578 spin_lock(&realm->inodes_with_caps_lock);
579 }
580 spin_unlock(&realm->inodes_with_caps_lock);
581 if (lastinode)
582 iput(lastinode);
583
584 dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino);
585 list_for_each_entry(child, &realm->children, child_item)
586 queue_realm_cap_snaps(child);
587
588 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
589}
542 590
543/* 591/*
544 * Parse and apply a snapblob "snap trace" from the MDS. This specifies 592 * Parse and apply a snapblob "snap trace" from the MDS. This specifies
@@ -589,29 +637,8 @@ more:
589 * 637 *
590 * ...unless it's a snap deletion! 638 * ...unless it's a snap deletion!
591 */ 639 */
592 if (!deletion) { 640 if (!deletion)
593 struct ceph_inode_info *ci; 641 queue_realm_cap_snaps(realm);
594 struct inode *lastinode = NULL;
595
596 spin_lock(&realm->inodes_with_caps_lock);
597 list_for_each_entry(ci, &realm->inodes_with_caps,
598 i_snap_realm_item) {
599 struct inode *inode = igrab(&ci->vfs_inode);
600 if (!inode)
601 continue;
602 spin_unlock(&realm->inodes_with_caps_lock);
603 if (lastinode)
604 iput(lastinode);
605 lastinode = inode;
606 ceph_queue_cap_snap(ci);
607 spin_lock(&realm->inodes_with_caps_lock);
608 }
609 spin_unlock(&realm->inodes_with_caps_lock);
610 if (lastinode)
611 iput(lastinode);
612 dout("update_snap_trace cap_snaps queued\n");
613 }
614
615 } else { 642 } else {
616 dout("update_snap_trace %llx %p seq %lld unchanged\n", 643 dout("update_snap_trace %llx %p seq %lld unchanged\n",
617 realm->ino, realm, realm->seq); 644 realm->ino, realm, realm->seq);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 2482d696f0de..c33897ae5725 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -216,8 +216,7 @@ struct ceph_cap_snap {
216 uid_t uid; 216 uid_t uid;
217 gid_t gid; 217 gid_t gid;
218 218
219 void *xattr_blob; 219 struct ceph_buffer *xattr_blob;
220 int xattr_len;
221 u64 xattr_version; 220 u64 xattr_version;
222 221
223 u64 size; 222 u64 size;
@@ -229,8 +228,11 @@ struct ceph_cap_snap {
229 228
230static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) 229static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
231{ 230{
232 if (atomic_dec_and_test(&capsnap->nref)) 231 if (atomic_dec_and_test(&capsnap->nref)) {
232 if (capsnap->xattr_blob)
233 ceph_buffer_put(capsnap->xattr_blob);
233 kfree(capsnap); 234 kfree(capsnap);
235 }
234} 236}
235 237
236/* 238/*
@@ -342,7 +344,8 @@ struct ceph_inode_info {
342 unsigned i_cap_exporting_issued; 344 unsigned i_cap_exporting_issued;
343 struct ceph_cap_reservation i_cap_migration_resv; 345 struct ceph_cap_reservation i_cap_migration_resv;
344 struct list_head i_cap_snaps; /* snapped state pending flush to mds */ 346 struct list_head i_cap_snaps; /* snapped state pending flush to mds */
345 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 */ 347 struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
348 dirty|flushing caps */
346 unsigned i_snap_caps; /* cap bits for snapped files */ 349 unsigned i_snap_caps; /* cap bits for snapped files */
347 350
348 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ 351 int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 097a2654c00f..9578af610b73 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -485,6 +485,7 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
485 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; 485 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
486 ci->i_xattrs.prealloc_blob = NULL; 486 ci->i_xattrs.prealloc_blob = NULL;
487 ci->i_xattrs.dirty = false; 487 ci->i_xattrs.dirty = false;
488 ci->i_xattrs.version++;
488 } 489 }
489} 490}
490 491
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 650638275a6f..7fe6b52df507 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -30,6 +30,8 @@
30 * This is a compressed table of upper and lower case conversion. 30 * This is a compressed table of upper and lower case conversion.
31 * 31 *
32 */ 32 */
33#ifndef _CIFS_UNICODE_H
34#define _CIFS_UNICODE_H
33 35
34#include <asm/byteorder.h> 36#include <asm/byteorder.h>
35#include <linux/types.h> 37#include <linux/types.h>
@@ -67,8 +69,8 @@ extern const struct UniCaseRange CifsUniUpperRange[];
67#endif /* UNIUPR_NOUPPER */ 69#endif /* UNIUPR_NOUPPER */
68 70
69#ifndef UNIUPR_NOLOWER 71#ifndef UNIUPR_NOLOWER
70extern signed char UniLowerTable[512]; 72extern signed char CifsUniLowerTable[512];
71extern struct UniCaseRange UniLowerRange[]; 73extern const struct UniCaseRange CifsUniLowerRange[];
72#endif /* UNIUPR_NOLOWER */ 74#endif /* UNIUPR_NOLOWER */
73 75
74#ifdef __KERNEL__ 76#ifdef __KERNEL__
@@ -337,15 +339,15 @@ UniStrupr(register wchar_t *upin)
337 * UniTolower: Convert a unicode character to lower case 339 * UniTolower: Convert a unicode character to lower case
338 */ 340 */
339static inline wchar_t 341static inline wchar_t
340UniTolower(wchar_t uc) 342UniTolower(register wchar_t uc)
341{ 343{
342 register struct UniCaseRange *rp; 344 register const struct UniCaseRange *rp;
343 345
344 if (uc < sizeof(UniLowerTable)) { 346 if (uc < sizeof(CifsUniLowerTable)) {
345 /* Latin characters */ 347 /* Latin characters */
346 return uc + UniLowerTable[uc]; /* Use base tables */ 348 return uc + CifsUniLowerTable[uc]; /* Use base tables */
347 } else { 349 } else {
348 rp = UniLowerRange; /* Use range tables */ 350 rp = CifsUniLowerRange; /* Use range tables */
349 while (rp->start) { 351 while (rp->start) {
350 if (uc < rp->start) /* Before start of range */ 352 if (uc < rp->start) /* Before start of range */
351 return uc; /* Uppercase = input */ 353 return uc; /* Uppercase = input */
@@ -374,3 +376,5 @@ UniStrlwr(register wchar_t *upin)
374} 376}
375 377
376#endif 378#endif
379
380#endif /* _CIFS_UNICODE_H */
diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h
index 18a9d978e519..0ac7c5a8633a 100644
--- a/fs/cifs/cifs_uniupr.h
+++ b/fs/cifs/cifs_uniupr.h
@@ -140,7 +140,7 @@ const struct UniCaseRange CifsUniUpperRange[] = {
140/* 140/*
141 * Latin lower case 141 * Latin lower case
142 */ 142 */
143static signed char CifsUniLowerTable[512] = { 143signed char CifsUniLowerTable[512] = {
144 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */ 144 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */
145 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */ 145 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */
146 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */ 146 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */
@@ -242,12 +242,12 @@ static signed char UniCaseRangeLff20[27] = {
242/* 242/*
243 * Lower Case Range 243 * Lower Case Range
244 */ 244 */
245static const struct UniCaseRange CifsUniLowerRange[] = { 245const struct UniCaseRange CifsUniLowerRange[] = {
246 0x0380, 0x03ab, UniCaseRangeL0380, 246 {0x0380, 0x03ab, UniCaseRangeL0380},
247 0x0400, 0x042f, UniCaseRangeL0400, 247 {0x0400, 0x042f, UniCaseRangeL0400},
248 0x0490, 0x04cb, UniCaseRangeL0490, 248 {0x0490, 0x04cb, UniCaseRangeL0490},
249 0x1e00, 0x1ff7, UniCaseRangeL1e00, 249 {0x1e00, 0x1ff7, UniCaseRangeL1e00},
250 0xff20, 0xff3a, UniCaseRangeLff20, 250 {0xff20, 0xff3a, UniCaseRangeLff20},
251 0, 0, 0 251 {0}
252}; 252};
253#endif 253#endif
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 847628dfdc44..35042d8f7338 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -223,63 +223,6 @@ int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
223 return 0; 223 return 0;
224} 224}
225 225
226int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses,
227 const struct nls_table *nls_info)
228{
229 char temp_hash[16];
230 struct HMACMD5Context ctx;
231 char *ucase_buf;
232 __le16 *unicode_buf;
233 unsigned int i, user_name_len, dom_name_len;
234
235 if (ses == NULL)
236 return -EINVAL;
237
238 E_md4hash(ses->password, temp_hash);
239
240 hmac_md5_init_limK_to_64(temp_hash, 16, &ctx);
241 user_name_len = strlen(ses->userName);
242 if (user_name_len > MAX_USERNAME_SIZE)
243 return -EINVAL;
244 if (ses->domainName == NULL)
245 return -EINVAL; /* BB should we use CIFS_LINUX_DOM */
246 dom_name_len = strlen(ses->domainName);
247 if (dom_name_len > MAX_USERNAME_SIZE)
248 return -EINVAL;
249
250 ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL);
251 if (ucase_buf == NULL)
252 return -ENOMEM;
253 unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL);
254 if (unicode_buf == NULL) {
255 kfree(ucase_buf);
256 return -ENOMEM;
257 }
258
259 for (i = 0; i < user_name_len; i++)
260 ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]];
261 ucase_buf[i] = 0;
262 user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf,
263 MAX_USERNAME_SIZE*2, nls_info);
264 unicode_buf[user_name_len] = 0;
265 user_name_len++;
266
267 for (i = 0; i < dom_name_len; i++)
268 ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]];
269 ucase_buf[i] = 0;
270 dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf,
271 MAX_USERNAME_SIZE*2, nls_info);
272
273 unicode_buf[user_name_len + dom_name_len] = 0;
274 hmac_md5_update((const unsigned char *) unicode_buf,
275 (user_name_len+dom_name_len)*2, &ctx);
276
277 hmac_md5_final(ses->server->ntlmv2_hash, &ctx);
278 kfree(ucase_buf);
279 kfree(unicode_buf);
280 return 0;
281}
282
283#ifdef CONFIG_CIFS_WEAK_PW_HASH 226#ifdef CONFIG_CIFS_WEAK_PW_HASH
284void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, 227void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
285 char *lnm_session_key) 228 char *lnm_session_key)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1f5450814087..1d60c655e3e0 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -87,8 +87,9 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
87extern int decode_negTokenInit(unsigned char *security_blob, int length, 87extern int decode_negTokenInit(unsigned char *security_blob, int length,
88 struct TCP_Server_Info *server); 88 struct TCP_Server_Info *server);
89extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); 89extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
90extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port);
90extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, 91extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
91 unsigned short int port); 92 const unsigned short int port);
92extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 93extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
93extern void header_assemble(struct smb_hdr *, char /* command */ , 94extern void header_assemble(struct smb_hdr *, char /* command */ ,
94 const struct cifsTconInfo *, int /* length of 95 const struct cifsTconInfo *, int /* length of
@@ -365,8 +366,6 @@ extern int cifs_verify_signature(struct smb_hdr *,
365 __u32 expected_sequence_number); 366 __u32 expected_sequence_number);
366extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, 367extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
367 const char *pass); 368 const char *pass);
368extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
369 const struct nls_table *);
370extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); 369extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
371extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, 370extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
372 const struct nls_table *); 371 const struct nls_table *);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 95c2ea67edfb..67dad54fbfa1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -400,7 +400,9 @@ incomplete_rcv:
400 cFYI(1, "call to reconnect done"); 400 cFYI(1, "call to reconnect done");
401 csocket = server->ssocket; 401 csocket = server->ssocket;
402 continue; 402 continue;
403 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { 403 } else if (length == -ERESTARTSYS ||
404 length == -EAGAIN ||
405 length == -EINTR) {
404 msleep(1); /* minimum sleep to prevent looping 406 msleep(1); /* minimum sleep to prevent looping
405 allowing socket to clear and app threads to set 407 allowing socket to clear and app threads to set
406 tcpStatus CifsNeedReconnect if server hung */ 408 tcpStatus CifsNeedReconnect if server hung */
@@ -414,18 +416,6 @@ incomplete_rcv:
414 } else 416 } else
415 continue; 417 continue;
416 } else if (length <= 0) { 418 } else if (length <= 0) {
417 if (server->tcpStatus == CifsNew) {
418 cFYI(1, "tcp session abend after SMBnegprot");
419 /* some servers kill the TCP session rather than
420 returning an SMB negprot error, in which
421 case reconnecting here is not going to help,
422 and so simply return error to mount */
423 break;
424 }
425 if (!try_to_freeze() && (length == -EINTR)) {
426 cFYI(1, "cifsd thread killed");
427 break;
428 }
429 cFYI(1, "Reconnect after unexpected peek error %d", 419 cFYI(1, "Reconnect after unexpected peek error %d",
430 length); 420 length);
431 cifs_reconnect(server); 421 cifs_reconnect(server);
@@ -466,27 +456,19 @@ incomplete_rcv:
466 an error on SMB negprot response */ 456 an error on SMB negprot response */
467 cFYI(1, "Negative RFC1002 Session Response Error 0x%x)", 457 cFYI(1, "Negative RFC1002 Session Response Error 0x%x)",
468 pdu_length); 458 pdu_length);
469 if (server->tcpStatus == CifsNew) { 459 /* give server a second to clean up */
470 /* if nack on negprot (rather than 460 msleep(1000);
471 ret of smb negprot error) reconnecting 461 /* always try 445 first on reconnect since we get NACK
472 not going to help, ret error to mount */ 462 * on some if we ever connected to port 139 (the NACK
473 break; 463 * is since we do not begin with RFC1001 session
474 } else { 464 * initialize frame)
475 /* give server a second to 465 */
476 clean up before reconnect attempt */ 466 cifs_set_port((struct sockaddr *)
477 msleep(1000); 467 &server->addr.sockAddr, CIFS_PORT);
478 /* always try 445 first on reconnect 468 cifs_reconnect(server);
479 since we get NACK on some if we ever 469 csocket = server->ssocket;
480 connected to port 139 (the NACK is 470 wake_up(&server->response_q);
481 since we do not begin with RFC1001 471 continue;
482 session initialize frame) */
483 server->addr.sockAddr.sin_port =
484 htons(CIFS_PORT);
485 cifs_reconnect(server);
486 csocket = server->ssocket;
487 wake_up(&server->response_q);
488 continue;
489 }
490 } else if (temp != (char) 0) { 472 } else if (temp != (char) 0) {
491 cERROR(1, "Unknown RFC 1002 frame"); 473 cERROR(1, "Unknown RFC 1002 frame");
492 cifs_dump_mem(" Received Data: ", (char *)smb_buffer, 474 cifs_dump_mem(" Received Data: ", (char *)smb_buffer,
@@ -522,8 +504,7 @@ incomplete_rcv:
522 total_read += length) { 504 total_read += length) {
523 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, 505 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
524 pdu_length - total_read, 0); 506 pdu_length - total_read, 0);
525 if ((server->tcpStatus == CifsExiting) || 507 if (server->tcpStatus == CifsExiting) {
526 (length == -EINTR)) {
527 /* then will exit */ 508 /* then will exit */
528 reconnect = 2; 509 reconnect = 2;
529 break; 510 break;
@@ -534,8 +515,9 @@ incomplete_rcv:
534 /* Now we will reread sock */ 515 /* Now we will reread sock */
535 reconnect = 1; 516 reconnect = 1;
536 break; 517 break;
537 } else if ((length == -ERESTARTSYS) || 518 } else if (length == -ERESTARTSYS ||
538 (length == -EAGAIN)) { 519 length == -EAGAIN ||
520 length == -EINTR) {
539 msleep(1); /* minimum sleep to prevent looping, 521 msleep(1); /* minimum sleep to prevent looping,
540 allowing socket to clear and app 522 allowing socket to clear and app
541 threads to set tcpStatus 523 threads to set tcpStatus
@@ -1673,7 +1655,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1673 MAX_USERNAME_SIZE)) 1655 MAX_USERNAME_SIZE))
1674 continue; 1656 continue;
1675 if (strlen(vol->username) != 0 && 1657 if (strlen(vol->username) != 0 &&
1676 strncmp(ses->password, vol->password, 1658 ses->password != NULL &&
1659 strncmp(ses->password,
1660 vol->password ? vol->password : "",
1677 MAX_PASSWORD_SIZE)) 1661 MAX_PASSWORD_SIZE))
1678 continue; 1662 continue;
1679 } 1663 }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 578d88c5b46e..f9ed0751cc12 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -305,8 +305,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
305 full_path = build_path_from_dentry(direntry); 305 full_path = build_path_from_dentry(direntry);
306 if (full_path == NULL) { 306 if (full_path == NULL) {
307 rc = -ENOMEM; 307 rc = -ENOMEM;
308 FreeXid(xid); 308 goto cifs_create_out;
309 return rc;
310 } 309 }
311 310
312 if (oplockEnabled) 311 if (oplockEnabled)
@@ -365,9 +364,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
365 364
366 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); 365 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
367 if (buf == NULL) { 366 if (buf == NULL) {
368 kfree(full_path); 367 rc = -ENOMEM;
369 FreeXid(xid); 368 goto cifs_create_out;
370 return -ENOMEM;
371 } 369 }
372 370
373 /* 371 /*
@@ -496,6 +494,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
496 struct cifsTconInfo *pTcon; 494 struct cifsTconInfo *pTcon;
497 char *full_path = NULL; 495 char *full_path = NULL;
498 struct inode *newinode = NULL; 496 struct inode *newinode = NULL;
497 int oplock = 0;
498 u16 fileHandle;
499 FILE_ALL_INFO *buf = NULL;
500 unsigned int bytes_written;
501 struct win_dev *pdev;
499 502
500 if (!old_valid_dev(device_number)) 503 if (!old_valid_dev(device_number))
501 return -EINVAL; 504 return -EINVAL;
@@ -506,9 +509,12 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
506 pTcon = cifs_sb->tcon; 509 pTcon = cifs_sb->tcon;
507 510
508 full_path = build_path_from_dentry(direntry); 511 full_path = build_path_from_dentry(direntry);
509 if (full_path == NULL) 512 if (full_path == NULL) {
510 rc = -ENOMEM; 513 rc = -ENOMEM;
511 else if (pTcon->unix_ext) { 514 goto mknod_out;
515 }
516
517 if (pTcon->unix_ext) {
512 struct cifs_unix_set_info_args args = { 518 struct cifs_unix_set_info_args args = {
513 .mode = mode & ~current_umask(), 519 .mode = mode & ~current_umask(),
514 .ctime = NO_CHANGE_64, 520 .ctime = NO_CHANGE_64,
@@ -527,87 +533,78 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
527 cifs_sb->local_nls, 533 cifs_sb->local_nls,
528 cifs_sb->mnt_cifs_flags & 534 cifs_sb->mnt_cifs_flags &
529 CIFS_MOUNT_MAP_SPECIAL_CHR); 535 CIFS_MOUNT_MAP_SPECIAL_CHR);
536 if (rc)
537 goto mknod_out;
530 538
531 if (!rc) { 539 rc = cifs_get_inode_info_unix(&newinode, full_path,
532 rc = cifs_get_inode_info_unix(&newinode, full_path,
533 inode->i_sb, xid); 540 inode->i_sb, xid);
534 if (pTcon->nocase) 541 if (pTcon->nocase)
535 direntry->d_op = &cifs_ci_dentry_ops; 542 direntry->d_op = &cifs_ci_dentry_ops;
536 else 543 else
537 direntry->d_op = &cifs_dentry_ops; 544 direntry->d_op = &cifs_dentry_ops;
538 if (rc == 0)
539 d_instantiate(direntry, newinode);
540 }
541 } else {
542 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
543 int oplock = 0;
544 u16 fileHandle;
545 FILE_ALL_INFO *buf;
546 545
547 cFYI(1, "sfu compat create special file"); 546 if (rc == 0)
547 d_instantiate(direntry, newinode);
548 goto mknod_out;
549 }
548 550
549 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); 551 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
550 if (buf == NULL) { 552 goto mknod_out;
551 kfree(full_path);
552 rc = -ENOMEM;
553 FreeXid(xid);
554 return rc;
555 }
556 553
557 rc = CIFSSMBOpen(xid, pTcon, full_path, 554
558 FILE_CREATE, /* fail if exists */ 555 cFYI(1, "sfu compat create special file");
559 GENERIC_WRITE /* BB would 556
560 WRITE_OWNER | WRITE_DAC be better? */, 557 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
561 /* Create a file and set the 558 if (buf == NULL) {
562 file attribute to SYSTEM */ 559 kfree(full_path);
563 CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, 560 rc = -ENOMEM;
564 &fileHandle, &oplock, buf, 561 FreeXid(xid);
565 cifs_sb->local_nls, 562 return rc;
566 cifs_sb->mnt_cifs_flags &
567 CIFS_MOUNT_MAP_SPECIAL_CHR);
568
569 /* BB FIXME - add handling for backlevel servers
570 which need legacy open and check for all
571 calls to SMBOpen for fallback to SMBLeagcyOpen */
572 if (!rc) {
573 /* BB Do not bother to decode buf since no
574 local inode yet to put timestamps in,
575 but we can reuse it safely */
576 unsigned int bytes_written;
577 struct win_dev *pdev;
578 pdev = (struct win_dev *)buf;
579 if (S_ISCHR(mode)) {
580 memcpy(pdev->type, "IntxCHR", 8);
581 pdev->major =
582 cpu_to_le64(MAJOR(device_number));
583 pdev->minor =
584 cpu_to_le64(MINOR(device_number));
585 rc = CIFSSMBWrite(xid, pTcon,
586 fileHandle,
587 sizeof(struct win_dev),
588 0, &bytes_written, (char *)pdev,
589 NULL, 0);
590 } else if (S_ISBLK(mode)) {
591 memcpy(pdev->type, "IntxBLK", 8);
592 pdev->major =
593 cpu_to_le64(MAJOR(device_number));
594 pdev->minor =
595 cpu_to_le64(MINOR(device_number));
596 rc = CIFSSMBWrite(xid, pTcon,
597 fileHandle,
598 sizeof(struct win_dev),
599 0, &bytes_written, (char *)pdev,
600 NULL, 0);
601 } /* else if(S_ISFIFO */
602 CIFSSMBClose(xid, pTcon, fileHandle);
603 d_drop(direntry);
604 }
605 kfree(buf);
606 /* add code here to set EAs */
607 }
608 } 563 }
609 564
565 /* FIXME: would WRITE_OWNER | WRITE_DAC be better? */
566 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_CREATE,
567 GENERIC_WRITE, CREATE_NOT_DIR | CREATE_OPTION_SPECIAL,
568 &fileHandle, &oplock, buf, cifs_sb->local_nls,
569 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
570 if (rc)
571 goto mknod_out;
572
573 /* BB Do not bother to decode buf since no local inode yet to put
574 * timestamps in, but we can reuse it safely */
575
576 pdev = (struct win_dev *)buf;
577 if (S_ISCHR(mode)) {
578 memcpy(pdev->type, "IntxCHR", 8);
579 pdev->major =
580 cpu_to_le64(MAJOR(device_number));
581 pdev->minor =
582 cpu_to_le64(MINOR(device_number));
583 rc = CIFSSMBWrite(xid, pTcon,
584 fileHandle,
585 sizeof(struct win_dev),
586 0, &bytes_written, (char *)pdev,
587 NULL, 0);
588 } else if (S_ISBLK(mode)) {
589 memcpy(pdev->type, "IntxBLK", 8);
590 pdev->major =
591 cpu_to_le64(MAJOR(device_number));
592 pdev->minor =
593 cpu_to_le64(MINOR(device_number));
594 rc = CIFSSMBWrite(xid, pTcon,
595 fileHandle,
596 sizeof(struct win_dev),
597 0, &bytes_written, (char *)pdev,
598 NULL, 0);
599 } /* else if (S_ISFIFO) */
600 CIFSSMBClose(xid, pTcon, fileHandle);
601 d_drop(direntry);
602
603 /* FIXME: add code here to set EAs */
604
605mknod_out:
610 kfree(full_path); 606 kfree(full_path);
607 kfree(buf);
611 FreeXid(xid); 608 FreeXid(xid);
612 return rc; 609 return rc;
613} 610}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index db11fdef0e92..de748c652d11 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -242,8 +242,7 @@ int cifs_open(struct inode *inode, struct file *file)
242 full_path = build_path_from_dentry(file->f_path.dentry); 242 full_path = build_path_from_dentry(file->f_path.dentry);
243 if (full_path == NULL) { 243 if (full_path == NULL) {
244 rc = -ENOMEM; 244 rc = -ENOMEM;
245 FreeXid(xid); 245 goto out;
246 return rc;
247 } 246 }
248 247
249 cFYI(1, "inode = 0x%p file flags are 0x%x for %s", 248 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4bc47e5b5f29..93f77d438d3c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -834,7 +834,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
834 xid, NULL); 834 xid, NULL);
835 835
836 if (!inode) 836 if (!inode)
837 return ERR_PTR(-ENOMEM); 837 return ERR_PTR(rc);
838 838
839#ifdef CONFIG_CIFS_FSCACHE 839#ifdef CONFIG_CIFS_FSCACHE
840 /* populate tcon->resource_id */ 840 /* populate tcon->resource_id */
@@ -1462,29 +1462,18 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1462{ 1462{
1463 char *fromName = NULL; 1463 char *fromName = NULL;
1464 char *toName = NULL; 1464 char *toName = NULL;
1465 struct cifs_sb_info *cifs_sb_source; 1465 struct cifs_sb_info *cifs_sb;
1466 struct cifs_sb_info *cifs_sb_target;
1467 struct cifsTconInfo *tcon; 1466 struct cifsTconInfo *tcon;
1468 FILE_UNIX_BASIC_INFO *info_buf_source = NULL; 1467 FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
1469 FILE_UNIX_BASIC_INFO *info_buf_target; 1468 FILE_UNIX_BASIC_INFO *info_buf_target;
1470 int xid, rc, tmprc; 1469 int xid, rc, tmprc;
1471 1470
1472 cifs_sb_target = CIFS_SB(target_dir->i_sb); 1471 cifs_sb = CIFS_SB(source_dir->i_sb);
1473 cifs_sb_source = CIFS_SB(source_dir->i_sb); 1472 tcon = cifs_sb->tcon;
1474 tcon = cifs_sb_source->tcon;
1475 1473
1476 xid = GetXid(); 1474 xid = GetXid();
1477 1475
1478 /* 1476 /*
1479 * BB: this might be allowed if same server, but different share.
1480 * Consider adding support for this
1481 */
1482 if (tcon != cifs_sb_target->tcon) {
1483 rc = -EXDEV;
1484 goto cifs_rename_exit;
1485 }
1486
1487 /*
1488 * we already have the rename sem so we do not need to 1477 * we already have the rename sem so we do not need to
1489 * grab it again here to protect the path integrity 1478 * grab it again here to protect the path integrity
1490 */ 1479 */
@@ -1519,17 +1508,16 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1519 info_buf_target = info_buf_source + 1; 1508 info_buf_target = info_buf_source + 1;
1520 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, 1509 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName,
1521 info_buf_source, 1510 info_buf_source,
1522 cifs_sb_source->local_nls, 1511 cifs_sb->local_nls,
1523 cifs_sb_source->mnt_cifs_flags & 1512 cifs_sb->mnt_cifs_flags &
1524 CIFS_MOUNT_MAP_SPECIAL_CHR); 1513 CIFS_MOUNT_MAP_SPECIAL_CHR);
1525 if (tmprc != 0) 1514 if (tmprc != 0)
1526 goto unlink_target; 1515 goto unlink_target;
1527 1516
1528 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, 1517 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName,
1529 toName, info_buf_target, 1518 info_buf_target,
1530 cifs_sb_target->local_nls, 1519 cifs_sb->local_nls,
1531 /* remap based on source sb */ 1520 cifs_sb->mnt_cifs_flags &
1532 cifs_sb_source->mnt_cifs_flags &
1533 CIFS_MOUNT_MAP_SPECIAL_CHR); 1521 CIFS_MOUNT_MAP_SPECIAL_CHR);
1534 1522
1535 if (tmprc == 0 && (info_buf_source->UniqueId == 1523 if (tmprc == 0 && (info_buf_source->UniqueId ==
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index f97851119e6c..9aad47a2d62f 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -206,26 +206,30 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
206} 206}
207 207
208int 208int
209cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, 209cifs_set_port(struct sockaddr *addr, const unsigned short int port)
210 const unsigned short int port)
211{ 210{
212 if (!cifs_convert_address(dst, src, len)) 211 switch (addr->sa_family) {
213 return 0;
214
215 switch (dst->sa_family) {
216 case AF_INET: 212 case AF_INET:
217 ((struct sockaddr_in *)dst)->sin_port = htons(port); 213 ((struct sockaddr_in *)addr)->sin_port = htons(port);
218 break; 214 break;
219 case AF_INET6: 215 case AF_INET6:
220 ((struct sockaddr_in6 *)dst)->sin6_port = htons(port); 216 ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
221 break; 217 break;
222 default: 218 default:
223 return 0; 219 return 0;
224 } 220 }
225
226 return 1; 221 return 1;
227} 222}
228 223
224int
225cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
226 const unsigned short int port)
227{
228 if (!cifs_convert_address(dst, src, len))
229 return 0;
230 return cifs_set_port(dst, port);
231}
232
229/***************************************************************************** 233/*****************************************************************************
230convert a NT status code to a dos class/code 234convert a NT status code to a dos class/code
231 *****************************************************************************/ 235 *****************************************************************************/
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 51f270b479b6..48d74c7391d1 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -634,7 +634,7 @@ static int dio_send_cur_page(struct dio *dio)
634 int ret = 0; 634 int ret = 0;
635 635
636 if (dio->bio) { 636 if (dio->bio) {
637 loff_t cur_offset = dio->block_in_file << dio->blkbits; 637 loff_t cur_offset = dio->cur_page_fs_offset;
638 loff_t bio_next_offset = dio->logical_offset_in_bio + 638 loff_t bio_next_offset = dio->logical_offset_in_bio +
639 dio->bio->bi_size; 639 dio->bio->bi_size;
640 640
@@ -659,7 +659,7 @@ static int dio_send_cur_page(struct dio *dio)
659 * Submit now if the underlying fs is about to perform a 659 * Submit now if the underlying fs is about to perform a
660 * metadata read 660 * metadata read
661 */ 661 */
662 if (dio->boundary) 662 else if (dio->boundary)
663 dio_bio_submit(dio); 663 dio_bio_submit(dio);
664 } 664 }
665 665
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index a2e3b562e65d..cbadc1bee6e7 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1793,7 +1793,7 @@ struct kmem_cache *ecryptfs_key_tfm_cache;
1793static struct list_head key_tfm_list; 1793static struct list_head key_tfm_list;
1794struct mutex key_tfm_list_mutex; 1794struct mutex key_tfm_list_mutex;
1795 1795
1796int ecryptfs_init_crypto(void) 1796int __init ecryptfs_init_crypto(void)
1797{ 1797{
1798 mutex_init(&key_tfm_list_mutex); 1798 mutex_init(&key_tfm_list_mutex);
1799 INIT_LIST_HEAD(&key_tfm_list); 1799 INIT_LIST_HEAD(&key_tfm_list);
@@ -2169,7 +2169,6 @@ int ecryptfs_encrypt_and_encode_filename(
2169 (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 2169 (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE
2170 + encoded_name_no_prefix_size); 2170 + encoded_name_no_prefix_size);
2171 (*encoded_name)[(*encoded_name_size)] = '\0'; 2171 (*encoded_name)[(*encoded_name_size)] = '\0';
2172 (*encoded_name_size)++;
2173 } else { 2172 } else {
2174 rc = -EOPNOTSUPP; 2173 rc = -EOPNOTSUPP;
2175 } 2174 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 6c55113e7222..3fbc94203380 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -349,7 +349,7 @@ out:
349 349
350/** 350/**
351 * ecryptfs_new_lower_dentry 351 * ecryptfs_new_lower_dentry
352 * @ename: The name of the new dentry. 352 * @name: The name of the new dentry.
353 * @lower_dir_dentry: Parent directory of the new dentry. 353 * @lower_dir_dentry: Parent directory of the new dentry.
354 * @nd: nameidata from last lookup. 354 * @nd: nameidata from last lookup.
355 * 355 *
@@ -386,20 +386,19 @@ ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
386 * ecryptfs_lookup_one_lower 386 * ecryptfs_lookup_one_lower
387 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up 387 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
388 * @lower_dir_dentry: lower parent directory 388 * @lower_dir_dentry: lower parent directory
389 * @name: lower file name
389 * 390 *
390 * Get the lower dentry from vfs. If lower dentry does not exist yet, 391 * Get the lower dentry from vfs. If lower dentry does not exist yet,
391 * create it. 392 * create it.
392 */ 393 */
393static struct dentry * 394static struct dentry *
394ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry, 395ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
395 struct dentry *lower_dir_dentry) 396 struct dentry *lower_dir_dentry, struct qstr *name)
396{ 397{
397 struct nameidata nd; 398 struct nameidata nd;
398 struct vfsmount *lower_mnt; 399 struct vfsmount *lower_mnt;
399 struct qstr *name;
400 int err; 400 int err;
401 401
402 name = &ecryptfs_dentry->d_name;
403 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( 402 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
404 ecryptfs_dentry->d_parent)); 403 ecryptfs_dentry->d_parent));
405 err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd); 404 err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
@@ -434,6 +433,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
434 size_t encrypted_and_encoded_name_size; 433 size_t encrypted_and_encoded_name_size;
435 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; 434 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
436 struct dentry *lower_dir_dentry, *lower_dentry; 435 struct dentry *lower_dir_dentry, *lower_dentry;
436 struct qstr lower_name;
437 int rc = 0; 437 int rc = 0;
438 438
439 ecryptfs_dentry->d_op = &ecryptfs_dops; 439 ecryptfs_dentry->d_op = &ecryptfs_dops;
@@ -444,9 +444,17 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
444 goto out_d_drop; 444 goto out_d_drop;
445 } 445 }
446 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 446 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
447 447 lower_name.name = ecryptfs_dentry->d_name.name;
448 lower_name.len = ecryptfs_dentry->d_name.len;
449 lower_name.hash = ecryptfs_dentry->d_name.hash;
450 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
451 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
452 &lower_name);
453 if (rc < 0)
454 goto out_d_drop;
455 }
448 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, 456 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
449 lower_dir_dentry); 457 lower_dir_dentry, &lower_name);
450 if (IS_ERR(lower_dentry)) { 458 if (IS_ERR(lower_dentry)) {
451 rc = PTR_ERR(lower_dentry); 459 rc = PTR_ERR(lower_dentry);
452 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 460 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
@@ -471,8 +479,17 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
471 "filename; rc = [%d]\n", __func__, rc); 479 "filename; rc = [%d]\n", __func__, rc);
472 goto out_d_drop; 480 goto out_d_drop;
473 } 481 }
482 lower_name.name = encrypted_and_encoded_name;
483 lower_name.len = encrypted_and_encoded_name_size;
484 lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
485 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
486 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
487 &lower_name);
488 if (rc < 0)
489 goto out_d_drop;
490 }
474 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, 491 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
475 lower_dir_dentry); 492 lower_dir_dentry, &lower_name);
476 if (IS_ERR(lower_dentry)) { 493 if (IS_ERR(lower_dentry)) {
477 rc = PTR_ERR(lower_dentry); 494 rc = PTR_ERR(lower_dentry);
478 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 495 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 89c5476506ef..73811cfa2ea4 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -515,6 +515,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
515 if (!s) { 515 if (!s) {
516 printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " 516 printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
517 "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); 517 "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
518 rc = -ENOMEM;
518 goto out; 519 goto out;
519 } 520 }
520 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 521 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -806,6 +807,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
806 if (!s) { 807 if (!s) {
807 printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc " 808 printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
808 "[%zd] bytes of kernel memory\n", __func__, sizeof(*s)); 809 "[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
810 rc = -ENOMEM;
809 goto out; 811 goto out;
810 } 812 }
811 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 813 s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index d8c3a373aafa..0851ab6980f5 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -86,7 +86,7 @@ out:
86 return 0; 86 return 0;
87} 87}
88 88
89int ecryptfs_init_kthread(void) 89int __init ecryptfs_init_kthread(void)
90{ 90{
91 int rc = 0; 91 int rc = 0;
92 92
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index bcb68c0cb1f0..ab2248090515 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -473,7 +473,7 @@ sleep:
473 return rc; 473 return rc;
474} 474}
475 475
476int ecryptfs_init_messaging(void) 476int __init ecryptfs_init_messaging(void)
477{ 477{
478 int i; 478 int i;
479 int rc = 0; 479 int rc = 0;
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 3745f612bcd4..00208c3d7e92 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -500,7 +500,7 @@ static struct miscdevice ecryptfs_miscdev = {
500 * 500 *
501 * Returns zero on success; non-zero otherwise 501 * Returns zero on success; non-zero otherwise
502 */ 502 */
503int ecryptfs_init_ecryptfs_miscdev(void) 503int __init ecryptfs_init_ecryptfs_miscdev(void)
504{ 504{
505 int rc; 505 int rc;
506 506
diff --git a/fs/exec.c b/fs/exec.c
index 2d9455282744..828dd2461d6b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max)
376 argv++; 376 argv++;
377 if (i++ >= max) 377 if (i++ >= max)
378 return -E2BIG; 378 return -E2BIG;
379
380 if (fatal_signal_pending(current))
381 return -ERESTARTNOHAND;
379 cond_resched(); 382 cond_resched();
380 } 383 }
381 } 384 }
@@ -419,6 +422,12 @@ static int copy_strings(int argc, const char __user *const __user *argv,
419 while (len > 0) { 422 while (len > 0) {
420 int offset, bytes_to_copy; 423 int offset, bytes_to_copy;
421 424
425 if (fatal_signal_pending(current)) {
426 ret = -ERESTARTNOHAND;
427 goto out;
428 }
429 cond_resched();
430
422 offset = pos % PAGE_SIZE; 431 offset = pos % PAGE_SIZE;
423 if (offset == 0) 432 if (offset == 0)
424 offset = PAGE_SIZE; 433 offset = PAGE_SIZE;
@@ -594,6 +603,11 @@ int setup_arg_pages(struct linux_binprm *bprm,
594#else 603#else
595 stack_top = arch_align_stack(stack_top); 604 stack_top = arch_align_stack(stack_top);
596 stack_top = PAGE_ALIGN(stack_top); 605 stack_top = PAGE_ALIGN(stack_top);
606
607 if (unlikely(stack_top < mmap_min_addr) ||
608 unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
609 return -ENOMEM;
610
597 stack_shift = vma->vm_end - stack_top; 611 stack_shift = vma->vm_end - stack_top;
598 612
599 bprm->p -= stack_shift; 613 bprm->p -= stack_shift;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6769fd0f35b8..f8cc34f542c3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -769,11 +769,15 @@ EXPORT_SYMBOL(kill_fasync);
769 769
770static int __init fcntl_init(void) 770static int __init fcntl_init(void)
771{ 771{
772 /* please add new bits here to ensure allocation uniqueness */ 772 /*
773 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 773 * Please add new bits here to ensure allocation uniqueness.
774 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
775 * is defined as O_NONBLOCK on some platforms and not on others.
776 */
777 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
774 O_RDONLY | O_WRONLY | O_RDWR | 778 O_RDONLY | O_WRONLY | O_RDWR |
775 O_CREAT | O_EXCL | O_NOCTTY | 779 O_CREAT | O_EXCL | O_NOCTTY |
776 O_TRUNC | O_APPEND | O_NONBLOCK | 780 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
777 __O_SYNC | O_DSYNC | FASYNC | 781 __O_SYNC | O_DSYNC | FASYNC |
778 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 782 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
779 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 783 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7d9d06ba184b..81e086d8aa57 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -808,7 +808,7 @@ int bdi_writeback_thread(void *data)
808 wb->last_active = jiffies; 808 wb->last_active = jiffies;
809 809
810 set_current_state(TASK_INTERRUPTIBLE); 810 set_current_state(TASK_INTERRUPTIBLE);
811 if (!list_empty(&bdi->work_list)) { 811 if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
812 __set_current_state(TASK_RUNNING); 812 __set_current_state(TASK_RUNNING);
813 continue; 813 continue;
814 } 814 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 69ad053ffd78..d367af1514ef 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -276,7 +276,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
276 * Called with fc->lock, unlocks it 276 * Called with fc->lock, unlocks it
277 */ 277 */
278static void request_end(struct fuse_conn *fc, struct fuse_req *req) 278static void request_end(struct fuse_conn *fc, struct fuse_req *req)
279__releases(&fc->lock) 279__releases(fc->lock)
280{ 280{
281 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; 281 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
282 req->end = NULL; 282 req->end = NULL;
@@ -306,8 +306,8 @@ __releases(&fc->lock)
306 306
307static void wait_answer_interruptible(struct fuse_conn *fc, 307static void wait_answer_interruptible(struct fuse_conn *fc,
308 struct fuse_req *req) 308 struct fuse_req *req)
309__releases(&fc->lock) 309__releases(fc->lock)
310__acquires(&fc->lock) 310__acquires(fc->lock)
311{ 311{
312 if (signal_pending(current)) 312 if (signal_pending(current))
313 return; 313 return;
@@ -325,8 +325,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
325} 325}
326 326
327static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 327static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
328__releases(&fc->lock) 328__releases(fc->lock)
329__acquires(&fc->lock) 329__acquires(fc->lock)
330{ 330{
331 if (!fc->no_interrupt) { 331 if (!fc->no_interrupt) {
332 /* Any signal may interrupt this */ 332 /* Any signal may interrupt this */
@@ -905,8 +905,8 @@ static int request_pending(struct fuse_conn *fc)
905 905
906/* Wait until a request is available on the pending list */ 906/* Wait until a request is available on the pending list */
907static void request_wait(struct fuse_conn *fc) 907static void request_wait(struct fuse_conn *fc)
908__releases(&fc->lock) 908__releases(fc->lock)
909__acquires(&fc->lock) 909__acquires(fc->lock)
910{ 910{
911 DECLARE_WAITQUEUE(wait, current); 911 DECLARE_WAITQUEUE(wait, current);
912 912
@@ -934,7 +934,7 @@ __acquires(&fc->lock)
934 */ 934 */
935static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, 935static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
936 size_t nbytes, struct fuse_req *req) 936 size_t nbytes, struct fuse_req *req)
937__releases(&fc->lock) 937__releases(fc->lock)
938{ 938{
939 struct fuse_in_header ih; 939 struct fuse_in_header ih;
940 struct fuse_interrupt_in arg; 940 struct fuse_interrupt_in arg;
@@ -1720,8 +1720,8 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1720 * This function releases and reacquires fc->lock 1720 * This function releases and reacquires fc->lock
1721 */ 1721 */
1722static void end_requests(struct fuse_conn *fc, struct list_head *head) 1722static void end_requests(struct fuse_conn *fc, struct list_head *head)
1723__releases(&fc->lock) 1723__releases(fc->lock)
1724__acquires(&fc->lock) 1724__acquires(fc->lock)
1725{ 1725{
1726 while (!list_empty(head)) { 1726 while (!list_empty(head)) {
1727 struct fuse_req *req; 1727 struct fuse_req *req;
@@ -1744,8 +1744,8 @@ __acquires(&fc->lock)
1744 * locked). 1744 * locked).
1745 */ 1745 */
1746static void end_io_requests(struct fuse_conn *fc) 1746static void end_io_requests(struct fuse_conn *fc)
1747__releases(&fc->lock) 1747__releases(fc->lock)
1748__acquires(&fc->lock) 1748__acquires(fc->lock)
1749{ 1749{
1750 while (!list_empty(&fc->io)) { 1750 while (!list_empty(&fc->io)) {
1751 struct fuse_req *req = 1751 struct fuse_req *req =
@@ -1769,6 +1769,16 @@ __acquires(&fc->lock)
1769 } 1769 }
1770} 1770}
1771 1771
1772static void end_queued_requests(struct fuse_conn *fc)
1773__releases(fc->lock)
1774__acquires(fc->lock)
1775{
1776 fc->max_background = UINT_MAX;
1777 flush_bg_queue(fc);
1778 end_requests(fc, &fc->pending);
1779 end_requests(fc, &fc->processing);
1780}
1781
1772/* 1782/*
1773 * Abort all requests. 1783 * Abort all requests.
1774 * 1784 *
@@ -1795,8 +1805,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
1795 fc->connected = 0; 1805 fc->connected = 0;
1796 fc->blocked = 0; 1806 fc->blocked = 0;
1797 end_io_requests(fc); 1807 end_io_requests(fc);
1798 end_requests(fc, &fc->pending); 1808 end_queued_requests(fc);
1799 end_requests(fc, &fc->processing);
1800 wake_up_all(&fc->waitq); 1809 wake_up_all(&fc->waitq);
1801 wake_up_all(&fc->blocked_waitq); 1810 wake_up_all(&fc->blocked_waitq);
1802 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 1811 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
@@ -1811,8 +1820,9 @@ int fuse_dev_release(struct inode *inode, struct file *file)
1811 if (fc) { 1820 if (fc) {
1812 spin_lock(&fc->lock); 1821 spin_lock(&fc->lock);
1813 fc->connected = 0; 1822 fc->connected = 0;
1814 end_requests(fc, &fc->pending); 1823 fc->blocked = 0;
1815 end_requests(fc, &fc->processing); 1824 end_queued_requests(fc);
1825 wake_up_all(&fc->blocked_waitq);
1816 spin_unlock(&fc->lock); 1826 spin_unlock(&fc->lock);
1817 fuse_conn_put(fc); 1827 fuse_conn_put(fc);
1818 } 1828 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 147c1f71bdb9..c8224587123f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1144,8 +1144,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1144 1144
1145/* Called under fc->lock, may release and reacquire it */ 1145/* Called under fc->lock, may release and reacquire it */
1146static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) 1146static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
1147__releases(&fc->lock) 1147__releases(fc->lock)
1148__acquires(&fc->lock) 1148__acquires(fc->lock)
1149{ 1149{
1150 struct fuse_inode *fi = get_fuse_inode(req->inode); 1150 struct fuse_inode *fi = get_fuse_inode(req->inode);
1151 loff_t size = i_size_read(req->inode); 1151 loff_t size = i_size_read(req->inode);
@@ -1183,8 +1183,8 @@ __acquires(&fc->lock)
1183 * Called with fc->lock 1183 * Called with fc->lock
1184 */ 1184 */
1185void fuse_flush_writepages(struct inode *inode) 1185void fuse_flush_writepages(struct inode *inode)
1186__releases(&fc->lock) 1186__releases(fc->lock)
1187__acquires(&fc->lock) 1187__acquires(fc->lock)
1188{ 1188{
1189 struct fuse_conn *fc = get_fuse_conn(inode); 1189 struct fuse_conn *fc = get_fuse_conn(inode);
1190 struct fuse_inode *fi = get_fuse_inode(inode); 1190 struct fuse_inode *fi = get_fuse_inode(inode);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index e20ee85955d1..f3f3578393a4 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -115,7 +115,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
115 115
116 inode_inc_link_count(dir); 116 inode_inc_link_count(dir);
117 117
118 inode = minix_new_inode(dir, mode, &err); 118 inode = minix_new_inode(dir, S_IFDIR | mode, &err);
119 if (!inode) 119 if (!inode)
120 goto out_dir; 120 goto out_dir;
121 121
diff --git a/fs/namespace.c b/fs/namespace.c
index de402eb6eafb..a72eaabfe8f2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1484,13 +1484,30 @@ out_unlock:
1484} 1484}
1485 1485
1486/* 1486/*
1487 * Sanity check the flags to change_mnt_propagation.
1488 */
1489
1490static int flags_to_propagation_type(int flags)
1491{
1492 int type = flags & ~MS_REC;
1493
1494 /* Fail if any non-propagation flags are set */
1495 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1496 return 0;
1497 /* Only one propagation flag should be set */
1498 if (!is_power_of_2(type))
1499 return 0;
1500 return type;
1501}
1502
1503/*
1487 * recursively change the type of the mountpoint. 1504 * recursively change the type of the mountpoint.
1488 */ 1505 */
1489static int do_change_type(struct path *path, int flag) 1506static int do_change_type(struct path *path, int flag)
1490{ 1507{
1491 struct vfsmount *m, *mnt = path->mnt; 1508 struct vfsmount *m, *mnt = path->mnt;
1492 int recurse = flag & MS_REC; 1509 int recurse = flag & MS_REC;
1493 int type = flag & ~MS_REC; 1510 int type;
1494 int err = 0; 1511 int err = 0;
1495 1512
1496 if (!capable(CAP_SYS_ADMIN)) 1513 if (!capable(CAP_SYS_ADMIN))
@@ -1499,6 +1516,10 @@ static int do_change_type(struct path *path, int flag)
1499 if (path->dentry != path->mnt->mnt_root) 1516 if (path->dentry != path->mnt->mnt_root)
1500 return -EINVAL; 1517 return -EINVAL;
1501 1518
1519 type = flags_to_propagation_type(flag);
1520 if (!type)
1521 return -EINVAL;
1522
1502 down_write(&namespace_sem); 1523 down_write(&namespace_sem);
1503 if (type == MS_SHARED) { 1524 if (type == MS_SHARED) {
1504 err = invent_group_ids(mnt, recurse); 1525 err = invent_group_ids(mnt, recurse);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2e7357104cfd..cf0d2ffb3c84 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -440,7 +440,7 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
440 440
441static int nfs4_access_to_omode(u32 access) 441static int nfs4_access_to_omode(u32 access)
442{ 442{
443 switch (access) { 443 switch (access & NFS4_SHARE_ACCESS_BOTH) {
444 case NFS4_SHARE_ACCESS_READ: 444 case NFS4_SHARE_ACCESS_READ:
445 return O_RDONLY; 445 return O_RDONLY;
446 case NFS4_SHARE_ACCESS_WRITE: 446 case NFS4_SHARE_ACCESS_WRITE:
@@ -2450,14 +2450,13 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
2450static __be32 2450static __be32
2451nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) 2451nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
2452{ 2452{
2453 u32 op_share_access, new_access; 2453 u32 op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK;
2454 bool new_access;
2454 __be32 status; 2455 __be32 status;
2455 2456
2456 set_access(&new_access, stp->st_access_bmap); 2457 new_access = !test_bit(op_share_access, &stp->st_access_bmap);
2457 new_access = (~new_access) & open->op_share_access & ~NFS4_SHARE_WANT_MASK;
2458
2459 if (new_access) { 2458 if (new_access) {
2460 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, new_access); 2459 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, op_share_access);
2461 if (status) 2460 if (status)
2462 return status; 2461 return status;
2463 } 2462 }
@@ -2470,7 +2469,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
2470 return status; 2469 return status;
2471 } 2470 }
2472 /* remember the open */ 2471 /* remember the open */
2473 op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK;
2474 __set_bit(op_share_access, &stp->st_access_bmap); 2472 __set_bit(op_share_access, &stp->st_access_bmap);
2475 __set_bit(open->op_share_deny, &stp->st_deny_bmap); 2473 __set_bit(open->op_share_deny, &stp->st_deny_bmap);
2476 2474
@@ -2983,7 +2981,6 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2983 *filpp = find_readable_file(stp->st_file); 2981 *filpp = find_readable_file(stp->st_file);
2984 else 2982 else
2985 *filpp = find_writeable_file(stp->st_file); 2983 *filpp = find_writeable_file(stp->st_file);
2986 BUG_ON(!*filpp); /* assured by check_openmode */
2987 } 2984 }
2988 } 2985 }
2989 status = nfs_ok; 2986 status = nfs_ok;
@@ -3561,7 +3558,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3561 struct nfs4_stateowner *open_sop = NULL; 3558 struct nfs4_stateowner *open_sop = NULL;
3562 struct nfs4_stateowner *lock_sop = NULL; 3559 struct nfs4_stateowner *lock_sop = NULL;
3563 struct nfs4_stateid *lock_stp; 3560 struct nfs4_stateid *lock_stp;
3564 struct file *filp; 3561 struct nfs4_file *fp;
3562 struct file *filp = NULL;
3565 struct file_lock file_lock; 3563 struct file_lock file_lock;
3566 struct file_lock conflock; 3564 struct file_lock conflock;
3567 __be32 status = 0; 3565 __be32 status = 0;
@@ -3591,7 +3589,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3591 * lock stateid. 3589 * lock stateid.
3592 */ 3590 */
3593 struct nfs4_stateid *open_stp = NULL; 3591 struct nfs4_stateid *open_stp = NULL;
3594 struct nfs4_file *fp;
3595 3592
3596 status = nfserr_stale_clientid; 3593 status = nfserr_stale_clientid;
3597 if (!nfsd4_has_session(cstate) && 3594 if (!nfsd4_has_session(cstate) &&
@@ -3634,6 +3631,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3634 if (status) 3631 if (status)
3635 goto out; 3632 goto out;
3636 lock_sop = lock->lk_replay_owner; 3633 lock_sop = lock->lk_replay_owner;
3634 fp = lock_stp->st_file;
3637 } 3635 }
3638 /* lock->lk_replay_owner and lock_stp have been created or found */ 3636 /* lock->lk_replay_owner and lock_stp have been created or found */
3639 3637
@@ -3648,13 +3646,19 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3648 switch (lock->lk_type) { 3646 switch (lock->lk_type) {
3649 case NFS4_READ_LT: 3647 case NFS4_READ_LT:
3650 case NFS4_READW_LT: 3648 case NFS4_READW_LT:
3651 filp = find_readable_file(lock_stp->st_file); 3649 if (find_readable_file(lock_stp->st_file)) {
3650 nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_READ);
3651 filp = find_readable_file(lock_stp->st_file);
3652 }
3652 file_lock.fl_type = F_RDLCK; 3653 file_lock.fl_type = F_RDLCK;
3653 cmd = F_SETLK; 3654 cmd = F_SETLK;
3654 break; 3655 break;
3655 case NFS4_WRITE_LT: 3656 case NFS4_WRITE_LT:
3656 case NFS4_WRITEW_LT: 3657 case NFS4_WRITEW_LT:
3657 filp = find_writeable_file(lock_stp->st_file); 3658 if (find_writeable_file(lock_stp->st_file)) {
3659 nfs4_get_vfs_file(rqstp, fp, &cstate->current_fh, NFS4_SHARE_ACCESS_WRITE);
3660 filp = find_writeable_file(lock_stp->st_file);
3661 }
3658 file_lock.fl_type = F_WRLCK; 3662 file_lock.fl_type = F_WRLCK;
3659 cmd = F_SETLK; 3663 cmd = F_SETLK;
3660 break; 3664 break;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 7731a75971dd..322518c88e4b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -363,23 +363,23 @@ struct nfs4_file {
363 * at all? */ 363 * at all? */
364static inline struct file *find_writeable_file(struct nfs4_file *f) 364static inline struct file *find_writeable_file(struct nfs4_file *f)
365{ 365{
366 if (f->fi_fds[O_RDWR]) 366 if (f->fi_fds[O_WRONLY])
367 return f->fi_fds[O_RDWR]; 367 return f->fi_fds[O_WRONLY];
368 return f->fi_fds[O_WRONLY]; 368 return f->fi_fds[O_RDWR];
369} 369}
370 370
371static inline struct file *find_readable_file(struct nfs4_file *f) 371static inline struct file *find_readable_file(struct nfs4_file *f)
372{ 372{
373 if (f->fi_fds[O_RDWR]) 373 if (f->fi_fds[O_RDONLY])
374 return f->fi_fds[O_RDWR]; 374 return f->fi_fds[O_RDONLY];
375 return f->fi_fds[O_RDONLY]; 375 return f->fi_fds[O_RDWR];
376} 376}
377 377
378static inline struct file *find_any_file(struct nfs4_file *f) 378static inline struct file *find_any_file(struct nfs4_file *f)
379{ 379{
380 if (f->fi_fds[O_RDWR]) 380 if (f->fi_fds[O_RDWR])
381 return f->fi_fds[O_RDWR]; 381 return f->fi_fds[O_RDWR];
382 else if (f->fi_fds[O_RDWR]) 382 else if (f->fi_fds[O_WRONLY])
383 return f->fi_fds[O_WRONLY]; 383 return f->fi_fds[O_WRONLY];
384 else 384 else
385 return f->fi_fds[O_RDONLY]; 385 return f->fi_fds[O_RDONLY];
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 96360a83cb91..661a6cf8e826 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2033,15 +2033,17 @@ out:
2033__be32 2033__be32
2034nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) 2034nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
2035{ 2035{
2036 struct path path = {
2037 .mnt = fhp->fh_export->ex_path.mnt,
2038 .dentry = fhp->fh_dentry,
2039 };
2040 __be32 err; 2036 __be32 err;
2041 2037
2042 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); 2038 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
2043 if (!err && vfs_statfs(&path, stat)) 2039 if (!err) {
2044 err = nfserr_io; 2040 struct path path = {
2041 .mnt = fhp->fh_export->ex_path.mnt,
2042 .dentry = fhp->fh_dentry,
2043 };
2044 if (vfs_statfs(&path, stat))
2045 err = nfserr_io;
2046 }
2045 return err; 2047 return err;
2046} 2048}
2047 2049
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 4317f177ea7c..ba7c10c917fc 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -446,6 +446,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
446 nilfs_mdt_destroy(nilfs->ns_cpfile); 446 nilfs_mdt_destroy(nilfs->ns_cpfile);
447 nilfs_mdt_destroy(nilfs->ns_sufile); 447 nilfs_mdt_destroy(nilfs->ns_sufile);
448 nilfs_mdt_destroy(nilfs->ns_dat); 448 nilfs_mdt_destroy(nilfs->ns_dat);
449 nilfs_mdt_destroy(nilfs->ns_gc_dat);
449 450
450 failed: 451 failed:
451 nilfs_clear_recovery_info(&ri); 452 nilfs_clear_recovery_info(&ri);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 756566fe8449..85366c78cc37 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -165,9 +165,6 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
165 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, 165 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
166 inode_mark, vfsmnt_mark, event_mask, data, data_type); 166 inode_mark, vfsmnt_mark, event_mask, data, data_type);
167 167
168 pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n",
169 __func__, group, vfsmnt_mark, inode_mark, event_mask);
170
171 /* sorry, fanotify only gives a damn about files and dirs */ 168 /* sorry, fanotify only gives a damn about files and dirs */
172 if (!S_ISREG(to_tell->i_mode) && 169 if (!S_ISREG(to_tell->i_mode) &&
173 !S_ISDIR(to_tell->i_mode)) 170 !S_ISDIR(to_tell->i_mode))
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 032b837fcd11..5ed8e58d7bfc 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -195,6 +195,14 @@ static int prepare_for_access_response(struct fsnotify_group *group,
195 re->fd = fd; 195 re->fd = fd;
196 196
197 mutex_lock(&group->fanotify_data.access_mutex); 197 mutex_lock(&group->fanotify_data.access_mutex);
198
199 if (group->fanotify_data.bypass_perm) {
200 mutex_unlock(&group->fanotify_data.access_mutex);
201 kmem_cache_free(fanotify_response_event_cache, re);
202 event->response = FAN_ALLOW;
203 return 0;
204 }
205
198 list_add_tail(&re->list, &group->fanotify_data.access_list); 206 list_add_tail(&re->list, &group->fanotify_data.access_list);
199 mutex_unlock(&group->fanotify_data.access_mutex); 207 mutex_unlock(&group->fanotify_data.access_mutex);
200 208
@@ -364,9 +372,28 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
364static int fanotify_release(struct inode *ignored, struct file *file) 372static int fanotify_release(struct inode *ignored, struct file *file)
365{ 373{
366 struct fsnotify_group *group = file->private_data; 374 struct fsnotify_group *group = file->private_data;
375 struct fanotify_response_event *re, *lre;
367 376
368 pr_debug("%s: file=%p group=%p\n", __func__, file, group); 377 pr_debug("%s: file=%p group=%p\n", __func__, file, group);
369 378
379#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
380 mutex_lock(&group->fanotify_data.access_mutex);
381
382 group->fanotify_data.bypass_perm = true;
383
384 list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
385 pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
386 re, re->event);
387
388 list_del_init(&re->list);
389 re->event->response = FAN_ALLOW;
390
391 kmem_cache_free(fanotify_response_event_cache, re);
392 }
393 mutex_unlock(&group->fanotify_data.access_mutex);
394
395 wake_up(&group->fanotify_data.access_waitq);
396#endif
370 /* matches the fanotify_init->fsnotify_alloc_group */ 397 /* matches the fanotify_init->fsnotify_alloc_group */
371 fsnotify_put_group(group); 398 fsnotify_put_group(group);
372 399
@@ -614,7 +641,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
614 __func__, flags, event_f_flags); 641 __func__, flags, event_f_flags);
615 642
616 if (!capable(CAP_SYS_ADMIN)) 643 if (!capable(CAP_SYS_ADMIN))
617 return -EACCES; 644 return -EPERM;
618 645
619 if (flags & ~FAN_ALL_INIT_FLAGS) 646 if (flags & ~FAN_ALL_INIT_FLAGS)
620 return -EINVAL; 647 return -EINVAL;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 3970392b2722..36802420d69a 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -148,13 +148,14 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
148 const unsigned char *file_name, 148 const unsigned char *file_name,
149 struct fsnotify_event **event) 149 struct fsnotify_event **event)
150{ 150{
151 struct fsnotify_group *group = inode_mark->group; 151 struct fsnotify_group *group = NULL;
152 __u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); 152 __u32 inode_test_mask = 0;
153 __u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); 153 __u32 vfsmount_test_mask = 0;
154 154
155 pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p" 155 if (unlikely(!inode_mark && !vfsmount_mark)) {
156 " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell, 156 BUG();
157 mnt, inode_mark, mask, data, data_is, cookie, *event); 157 return 0;
158 }
158 159
159 /* clear ignored on inode modification */ 160 /* clear ignored on inode modification */
160 if (mask & FS_MODIFY) { 161 if (mask & FS_MODIFY) {
@@ -168,18 +169,29 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
168 169
169 /* does the inode mark tell us to do something? */ 170 /* does the inode mark tell us to do something? */
170 if (inode_mark) { 171 if (inode_mark) {
172 group = inode_mark->group;
173 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD);
171 inode_test_mask &= inode_mark->mask; 174 inode_test_mask &= inode_mark->mask;
172 inode_test_mask &= ~inode_mark->ignored_mask; 175 inode_test_mask &= ~inode_mark->ignored_mask;
173 } 176 }
174 177
175 /* does the vfsmount_mark tell us to do something? */ 178 /* does the vfsmount_mark tell us to do something? */
176 if (vfsmount_mark) { 179 if (vfsmount_mark) {
180 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD);
181 group = vfsmount_mark->group;
177 vfsmount_test_mask &= vfsmount_mark->mask; 182 vfsmount_test_mask &= vfsmount_mark->mask;
178 vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; 183 vfsmount_test_mask &= ~vfsmount_mark->ignored_mask;
179 if (inode_mark) 184 if (inode_mark)
180 vfsmount_test_mask &= ~inode_mark->ignored_mask; 185 vfsmount_test_mask &= ~inode_mark->ignored_mask;
181 } 186 }
182 187
188 pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x inode_mark=%p"
189 " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x"
190 " data=%p data_is=%d cookie=%d event=%p\n",
191 __func__, group, to_tell, mnt, mask, inode_mark,
192 inode_test_mask, vfsmount_mark, vfsmount_test_mask, data,
193 data_is, cookie, *event);
194
183 if (!inode_test_mask && !vfsmount_test_mask) 195 if (!inode_test_mask && !vfsmount_test_mask)
184 return 0; 196 return 0;
185 197
@@ -207,13 +219,12 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
207int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, 219int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
208 const unsigned char *file_name, u32 cookie) 220 const unsigned char *file_name, u32 cookie)
209{ 221{
210 struct hlist_node *inode_node, *vfsmount_node; 222 struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
211 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; 223 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
212 struct fsnotify_group *inode_group, *vfsmount_group; 224 struct fsnotify_group *inode_group, *vfsmount_group;
213 struct fsnotify_event *event = NULL; 225 struct fsnotify_event *event = NULL;
214 struct vfsmount *mnt; 226 struct vfsmount *mnt;
215 int idx, ret = 0; 227 int idx, ret = 0;
216 bool used_inode = false, used_vfsmount = false;
217 /* global tests shouldn't care about events on child only the specific event */ 228 /* global tests shouldn't care about events on child only the specific event */
218 __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); 229 __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
219 230
@@ -238,57 +249,50 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
238 (test_mask & to_tell->i_fsnotify_mask)) 249 (test_mask & to_tell->i_fsnotify_mask))
239 inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, 250 inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
240 &fsnotify_mark_srcu); 251 &fsnotify_mark_srcu);
241 else
242 inode_node = NULL;
243 252
244 if (mnt) { 253 if (mnt && ((mask & FS_MODIFY) ||
245 if ((mask & FS_MODIFY) || 254 (test_mask & mnt->mnt_fsnotify_mask))) {
246 (test_mask & mnt->mnt_fsnotify_mask)) 255 vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first,
247 vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, 256 &fsnotify_mark_srcu);
248 &fsnotify_mark_srcu); 257 inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
249 else 258 &fsnotify_mark_srcu);
250 vfsmount_node = NULL;
251 } else {
252 mnt = NULL;
253 vfsmount_node = NULL;
254 } 259 }
255 260
256 while (inode_node || vfsmount_node) { 261 while (inode_node || vfsmount_node) {
262 inode_group = vfsmount_group = NULL;
263
257 if (inode_node) { 264 if (inode_node) {
258 inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), 265 inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
259 struct fsnotify_mark, i.i_list); 266 struct fsnotify_mark, i.i_list);
260 inode_group = inode_mark->group; 267 inode_group = inode_mark->group;
261 } else 268 }
262 inode_group = (void *)-1;
263 269
264 if (vfsmount_node) { 270 if (vfsmount_node) {
265 vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), 271 vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
266 struct fsnotify_mark, m.m_list); 272 struct fsnotify_mark, m.m_list);
267 vfsmount_group = vfsmount_mark->group; 273 vfsmount_group = vfsmount_mark->group;
268 } else 274 }
269 vfsmount_group = (void *)-1;
270 275
271 if (inode_group < vfsmount_group) { 276 if (inode_group > vfsmount_group) {
272 /* handle inode */ 277 /* handle inode */
273 send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, 278 send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
274 data_is, cookie, file_name, &event); 279 data_is, cookie, file_name, &event);
275 used_inode = true; 280 /* we didn't use the vfsmount_mark */
276 } else if (vfsmount_group < inode_group) { 281 vfsmount_group = NULL;
282 } else if (vfsmount_group > inode_group) {
277 send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, 283 send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
278 data_is, cookie, file_name, &event); 284 data_is, cookie, file_name, &event);
279 used_vfsmount = true; 285 inode_group = NULL;
280 } else { 286 } else {
281 send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, 287 send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
282 mask, data, data_is, cookie, file_name, 288 mask, data, data_is, cookie, file_name,
283 &event); 289 &event);
284 used_vfsmount = true;
285 used_inode = true;
286 } 290 }
287 291
288 if (used_inode) 292 if (inode_group)
289 inode_node = srcu_dereference(inode_node->next, 293 inode_node = srcu_dereference(inode_node->next,
290 &fsnotify_mark_srcu); 294 &fsnotify_mark_srcu);
291 if (used_vfsmount) 295 if (vfsmount_group)
292 vfsmount_node = srcu_dereference(vfsmount_node->next, 296 vfsmount_node = srcu_dereference(vfsmount_node->next,
293 &fsnotify_mark_srcu); 297 &fsnotify_mark_srcu);
294 } 298 }
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 215e12ce1d85..592fae5007d1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6672,7 +6672,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
6672 last_page_bytes = PAGE_ALIGN(end); 6672 last_page_bytes = PAGE_ALIGN(end);
6673 index = start >> PAGE_CACHE_SHIFT; 6673 index = start >> PAGE_CACHE_SHIFT;
6674 do { 6674 do {
6675 pages[numpages] = grab_cache_page(mapping, index); 6675 pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
6676 if (!pages[numpages]) { 6676 if (!pages[numpages]) {
6677 ret = -ENOMEM; 6677 ret = -ENOMEM;
6678 mlog_errno(ret); 6678 mlog_errno(ret);
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index ec6d12339593..c7ee03c22226 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
439 439
440 ocfs2_blockcheck_inc_failure(stats); 440 ocfs2_blockcheck_inc_failure(stats);
441 mlog(ML_ERROR, 441 mlog(ML_ERROR,
442 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", 442 "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n",
443 (unsigned int)check.bc_crc32e, (unsigned int)crc); 443 (unsigned int)check.bc_crc32e, (unsigned int)crc);
444 444
445 /* Ok, try ECC fixups */ 445 /* Ok, try ECC fixups */
@@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
453 goto out; 453 goto out;
454 } 454 }
455 455
456 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", 456 mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n",
457 (unsigned int)check.bc_crc32e, (unsigned int)crc); 457 (unsigned int)check.bc_crc32e, (unsigned int)crc);
458 458
459 rc = -EIO; 459 rc = -EIO;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 81296b4e3646..9a03c151b5ce 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -36,6 +36,7 @@
36#include <linux/writeback.h> 36#include <linux/writeback.h>
37#include <linux/falloc.h> 37#include <linux/falloc.h>
38#include <linux/quotaops.h> 38#include <linux/quotaops.h>
39#include <linux/blkdev.h>
39 40
40#define MLOG_MASK_PREFIX ML_INODE 41#define MLOG_MASK_PREFIX ML_INODE
41#include <cluster/masklog.h> 42#include <cluster/masklog.h>
@@ -190,8 +191,16 @@ static int ocfs2_sync_file(struct file *file, int datasync)
190 if (err) 191 if (err)
191 goto bail; 192 goto bail;
192 193
193 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 194 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
195 /*
196 * We still have to flush drive's caches to get data to the
197 * platter
198 */
199 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
200 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
201 NULL, BLKDEV_IFL_WAIT);
194 goto bail; 202 goto bail;
203 }
195 204
196 journal = osb->journal->j_journal; 205 journal = osb->journal->j_journal;
197 err = jbd2_journal_force_commit(journal); 206 err = jbd2_journal_force_commit(journal);
@@ -774,7 +783,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
774 BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); 783 BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
775 BUG_ON(abs_from & (inode->i_blkbits - 1)); 784 BUG_ON(abs_from & (inode->i_blkbits - 1));
776 785
777 page = grab_cache_page(mapping, index); 786 page = find_or_create_page(mapping, index, GFP_NOFS);
778 if (!page) { 787 if (!page) {
779 ret = -ENOMEM; 788 ret = -ENOMEM;
780 mlog_errno(ret); 789 mlog_errno(ret);
@@ -2329,7 +2338,7 @@ out_dio:
2329 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2338 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2330 2339
2331 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || 2340 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
2332 ((file->f_flags & O_DIRECT) && has_refcount)) { 2341 ((file->f_flags & O_DIRECT) && !direct_io)) {
2333 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2342 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2334 pos + count - 1); 2343 pos + count - 1);
2335 if (ret < 0) 2344 if (ret < 0)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0492464916b1..eece3e05d9d0 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
488 OCFS2_BH_IGNORE_CACHE); 488 OCFS2_BH_IGNORE_CACHE);
489 } else { 489 } else {
490 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); 490 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
491 if (!status) 491 /*
492 * If buffer is in jbd, then its checksum may not have been
493 * computed as yet.
494 */
495 if (!status && !buffer_jbd(bh))
492 status = ocfs2_validate_inode_block(osb->sb, bh); 496 status = ocfs2_validate_inode_block(osb->sb, bh);
493 } 497 }
494 if (status < 0) { 498 if (status < 0) {
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index af2b8fe1f139..4c18f4ad93b4 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -74,9 +74,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
74 /* 74 /*
75 * Another node might have truncated while we were waiting on 75 * Another node might have truncated while we were waiting on
76 * cluster locks. 76 * cluster locks.
77 * We don't check size == 0 before the shift. This is borrowed
78 * from do_generic_file_read.
77 */ 79 */
78 last_index = size >> PAGE_CACHE_SHIFT; 80 last_index = (size - 1) >> PAGE_CACHE_SHIFT;
79 if (page->index > last_index) { 81 if (unlikely(!size || page->index > last_index)) {
80 ret = -EINVAL; 82 ret = -EINVAL;
81 goto out; 83 goto out;
82 } 84 }
@@ -107,7 +109,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
107 * because the "write" would invalidate their data. 109 * because the "write" would invalidate their data.
108 */ 110 */
109 if (page->index == last_index) 111 if (page->index == last_index)
110 len = size & ~PAGE_CACHE_MASK; 112 len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
111 113
112 ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, 114 ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
113 &fsdata, di_bh, page); 115 &fsdata, di_bh, page);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f171b51a74f7..a00dda2e4f16 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -472,32 +472,23 @@ leave:
472 return status; 472 return status;
473} 473}
474 474
475static int ocfs2_mknod_locked(struct ocfs2_super *osb, 475static int __ocfs2_mknod_locked(struct inode *dir,
476 struct inode *dir, 476 struct inode *inode,
477 struct inode *inode, 477 dev_t dev,
478 dev_t dev, 478 struct buffer_head **new_fe_bh,
479 struct buffer_head **new_fe_bh, 479 struct buffer_head *parent_fe_bh,
480 struct buffer_head *parent_fe_bh, 480 handle_t *handle,
481 handle_t *handle, 481 struct ocfs2_alloc_context *inode_ac,
482 struct ocfs2_alloc_context *inode_ac) 482 u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit)
483{ 483{
484 int status = 0; 484 int status = 0;
485 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
485 struct ocfs2_dinode *fe = NULL; 486 struct ocfs2_dinode *fe = NULL;
486 struct ocfs2_extent_list *fel; 487 struct ocfs2_extent_list *fel;
487 u64 suballoc_loc, fe_blkno = 0;
488 u16 suballoc_bit;
489 u16 feat; 488 u16 feat;
490 489
491 *new_fe_bh = NULL; 490 *new_fe_bh = NULL;
492 491
493 status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
494 inode_ac, &suballoc_loc,
495 &suballoc_bit, &fe_blkno);
496 if (status < 0) {
497 mlog_errno(status);
498 goto leave;
499 }
500
501 /* populate as many fields early on as possible - many of 492 /* populate as many fields early on as possible - many of
502 * these are used by the support functions here and in 493 * these are used by the support functions here and in
503 * callers. */ 494 * callers. */
@@ -591,6 +582,34 @@ leave:
591 return status; 582 return status;
592} 583}
593 584
585static int ocfs2_mknod_locked(struct ocfs2_super *osb,
586 struct inode *dir,
587 struct inode *inode,
588 dev_t dev,
589 struct buffer_head **new_fe_bh,
590 struct buffer_head *parent_fe_bh,
591 handle_t *handle,
592 struct ocfs2_alloc_context *inode_ac)
593{
594 int status = 0;
595 u64 suballoc_loc, fe_blkno = 0;
596 u16 suballoc_bit;
597
598 *new_fe_bh = NULL;
599
600 status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
601 inode_ac, &suballoc_loc,
602 &suballoc_bit, &fe_blkno);
603 if (status < 0) {
604 mlog_errno(status);
605 return status;
606 }
607
608 return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
609 parent_fe_bh, handle, inode_ac,
610 fe_blkno, suballoc_loc, suballoc_bit);
611}
612
594static int ocfs2_mkdir(struct inode *dir, 613static int ocfs2_mkdir(struct inode *dir,
595 struct dentry *dentry, 614 struct dentry *dentry,
596 int mode) 615 int mode)
@@ -1852,61 +1871,117 @@ bail:
1852 return status; 1871 return status;
1853} 1872}
1854 1873
1855static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 1874static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
1856 struct inode **ret_orphan_dir, 1875 struct inode **ret_orphan_dir,
1857 u64 blkno, 1876 struct buffer_head **ret_orphan_dir_bh)
1858 char *name,
1859 struct ocfs2_dir_lookup_result *lookup)
1860{ 1877{
1861 struct inode *orphan_dir_inode; 1878 struct inode *orphan_dir_inode;
1862 struct buffer_head *orphan_dir_bh = NULL; 1879 struct buffer_head *orphan_dir_bh = NULL;
1863 int status = 0; 1880 int ret = 0;
1864
1865 status = ocfs2_blkno_stringify(blkno, name);
1866 if (status < 0) {
1867 mlog_errno(status);
1868 return status;
1869 }
1870 1881
1871 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 1882 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
1872 ORPHAN_DIR_SYSTEM_INODE, 1883 ORPHAN_DIR_SYSTEM_INODE,
1873 osb->slot_num); 1884 osb->slot_num);
1874 if (!orphan_dir_inode) { 1885 if (!orphan_dir_inode) {
1875 status = -ENOENT; 1886 ret = -ENOENT;
1876 mlog_errno(status); 1887 mlog_errno(ret);
1877 return status; 1888 return ret;
1878 } 1889 }
1879 1890
1880 mutex_lock(&orphan_dir_inode->i_mutex); 1891 mutex_lock(&orphan_dir_inode->i_mutex);
1881 1892
1882 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); 1893 ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
1883 if (status < 0) { 1894 if (ret < 0) {
1884 mlog_errno(status); 1895 mutex_unlock(&orphan_dir_inode->i_mutex);
1885 goto leave; 1896 iput(orphan_dir_inode);
1897
1898 mlog_errno(ret);
1899 return ret;
1886 } 1900 }
1887 1901
1888 status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, 1902 *ret_orphan_dir = orphan_dir_inode;
1889 orphan_dir_bh, name, 1903 *ret_orphan_dir_bh = orphan_dir_bh;
1890 OCFS2_ORPHAN_NAMELEN, lookup);
1891 if (status < 0) {
1892 ocfs2_inode_unlock(orphan_dir_inode, 1);
1893 1904
1894 mlog_errno(status); 1905 return 0;
1895 goto leave; 1906}
1907
1908static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
1909 struct buffer_head *orphan_dir_bh,
1910 u64 blkno,
1911 char *name,
1912 struct ocfs2_dir_lookup_result *lookup)
1913{
1914 int ret;
1915 struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb);
1916
1917 ret = ocfs2_blkno_stringify(blkno, name);
1918 if (ret < 0) {
1919 mlog_errno(ret);
1920 return ret;
1921 }
1922
1923 ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
1924 orphan_dir_bh, name,
1925 OCFS2_ORPHAN_NAMELEN, lookup);
1926 if (ret < 0) {
1927 mlog_errno(ret);
1928 return ret;
1929 }
1930
1931 return 0;
1932}
1933
1934/**
1935 * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for
1936 * insertion of an orphan.
1937 * @osb: ocfs2 file system
1938 * @ret_orphan_dir: Orphan dir inode - returned locked!
1939 * @blkno: Actual block number of the inode to be inserted into orphan dir.
1940 * @lookup: dir lookup result, to be passed back into functions like
1941 * ocfs2_orphan_add
1942 *
1943 * Returns zero on success and the ret_orphan_dir, name and lookup
1944 * fields will be populated.
1945 *
1946 * Returns non-zero on failure.
1947 */
1948static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
1949 struct inode **ret_orphan_dir,
1950 u64 blkno,
1951 char *name,
1952 struct ocfs2_dir_lookup_result *lookup)
1953{
1954 struct inode *orphan_dir_inode = NULL;
1955 struct buffer_head *orphan_dir_bh = NULL;
1956 int ret = 0;
1957
1958 ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode,
1959 &orphan_dir_bh);
1960 if (ret < 0) {
1961 mlog_errno(ret);
1962 return ret;
1963 }
1964
1965 ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh,
1966 blkno, name, lookup);
1967 if (ret < 0) {
1968 mlog_errno(ret);
1969 goto out;
1896 } 1970 }
1897 1971
1898 *ret_orphan_dir = orphan_dir_inode; 1972 *ret_orphan_dir = orphan_dir_inode;
1899 1973
1900leave: 1974out:
1901 if (status) { 1975 brelse(orphan_dir_bh);
1976
1977 if (ret) {
1978 ocfs2_inode_unlock(orphan_dir_inode, 1);
1902 mutex_unlock(&orphan_dir_inode->i_mutex); 1979 mutex_unlock(&orphan_dir_inode->i_mutex);
1903 iput(orphan_dir_inode); 1980 iput(orphan_dir_inode);
1904 } 1981 }
1905 1982
1906 brelse(orphan_dir_bh); 1983 mlog_exit(ret);
1907 1984 return ret;
1908 mlog_exit(status);
1909 return status;
1910} 1985}
1911 1986
1912static int ocfs2_orphan_add(struct ocfs2_super *osb, 1987static int ocfs2_orphan_add(struct ocfs2_super *osb,
@@ -2053,6 +2128,99 @@ leave:
2053 return status; 2128 return status;
2054} 2129}
2055 2130
2131/**
2132 * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly
2133 * allocated file. This is different from the typical 'add to orphan dir'
2134 * operation in that the inode does not yet exist. This is a problem because
2135 * the orphan dir stringifies the inode block number to come up with it's
2136 * dirent. Obviously if the inode does not yet exist we have a chicken and egg
2137 * problem. This function works around it by calling deeper into the orphan
2138 * and suballoc code than other callers. Use this only by necessity.
2139 * @dir: The directory which this inode will ultimately wind up under - not the
2140 * orphan dir!
2141 * @dir_bh: buffer_head the @dir inode block
2142 * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled
2143 * with the string to be used for orphan dirent. Pass back to the orphan dir
2144 * code.
2145 * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan
2146 * dir code.
2147 * @ret_di_blkno: block number where the new inode will be allocated.
2148 * @orphan_insert: Dir insert context to be passed back into orphan dir code.
2149 * @ret_inode_ac: Inode alloc context to be passed back to the allocator.
2150 *
2151 * Returns zero on success and the ret_orphan_dir, name and lookup
2152 * fields will be populated.
2153 *
2154 * Returns non-zero on failure.
2155 */
2156static int ocfs2_prep_new_orphaned_file(struct inode *dir,
2157 struct buffer_head *dir_bh,
2158 char *orphan_name,
2159 struct inode **ret_orphan_dir,
2160 u64 *ret_di_blkno,
2161 struct ocfs2_dir_lookup_result *orphan_insert,
2162 struct ocfs2_alloc_context **ret_inode_ac)
2163{
2164 int ret;
2165 u64 di_blkno;
2166 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2167 struct inode *orphan_dir = NULL;
2168 struct buffer_head *orphan_dir_bh = NULL;
2169 struct ocfs2_alloc_context *inode_ac = NULL;
2170
2171 ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh);
2172 if (ret < 0) {
2173 mlog_errno(ret);
2174 return ret;
2175 }
2176
2177 /* reserve an inode spot */
2178 ret = ocfs2_reserve_new_inode(osb, &inode_ac);
2179 if (ret < 0) {
2180 if (ret != -ENOSPC)
2181 mlog_errno(ret);
2182 goto out;
2183 }
2184
2185 ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac,
2186 &di_blkno);
2187 if (ret) {
2188 mlog_errno(ret);
2189 goto out;
2190 }
2191
2192 ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh,
2193 di_blkno, orphan_name, orphan_insert);
2194 if (ret < 0) {
2195 mlog_errno(ret);
2196 goto out;
2197 }
2198
2199out:
2200 if (ret == 0) {
2201 *ret_orphan_dir = orphan_dir;
2202 *ret_di_blkno = di_blkno;
2203 *ret_inode_ac = inode_ac;
2204 /*
2205 * orphan_name and orphan_insert are already up to
2206 * date via prepare_orphan_dir
2207 */
2208 } else {
2209 /* Unroll reserve_new_inode* */
2210 if (inode_ac)
2211 ocfs2_free_alloc_context(inode_ac);
2212
2213 /* Unroll orphan dir locking */
2214 mutex_unlock(&orphan_dir->i_mutex);
2215 ocfs2_inode_unlock(orphan_dir, 1);
2216 iput(orphan_dir);
2217 }
2218
2219 brelse(orphan_dir_bh);
2220
2221 return 0;
2222}
2223
2056int ocfs2_create_inode_in_orphan(struct inode *dir, 2224int ocfs2_create_inode_in_orphan(struct inode *dir,
2057 int mode, 2225 int mode,
2058 struct inode **new_inode) 2226 struct inode **new_inode)
@@ -2068,6 +2236,8 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2068 struct buffer_head *new_di_bh = NULL; 2236 struct buffer_head *new_di_bh = NULL;
2069 struct ocfs2_alloc_context *inode_ac = NULL; 2237 struct ocfs2_alloc_context *inode_ac = NULL;
2070 struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; 2238 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
2239 u64 uninitialized_var(di_blkno), suballoc_loc;
2240 u16 suballoc_bit;
2071 2241
2072 status = ocfs2_inode_lock(dir, &parent_di_bh, 1); 2242 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2073 if (status < 0) { 2243 if (status < 0) {
@@ -2076,20 +2246,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2076 return status; 2246 return status;
2077 } 2247 }
2078 2248
2079 /* 2249 status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh,
2080 * We give the orphan dir the root blkno to fake an orphan name, 2250 orphan_name, &orphan_dir,
2081 * and allocate enough space for our insertion. 2251 &di_blkno, &orphan_insert, &inode_ac);
2082 */
2083 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
2084 osb->root_blkno,
2085 orphan_name, &orphan_insert);
2086 if (status < 0) {
2087 mlog_errno(status);
2088 goto leave;
2089 }
2090
2091 /* reserve an inode spot */
2092 status = ocfs2_reserve_new_inode(osb, &inode_ac);
2093 if (status < 0) { 2252 if (status < 0) {
2094 if (status != -ENOSPC) 2253 if (status != -ENOSPC)
2095 mlog_errno(status); 2254 mlog_errno(status);
@@ -2116,17 +2275,20 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2116 goto leave; 2275 goto leave;
2117 did_quota_inode = 1; 2276 did_quota_inode = 1;
2118 2277
2119 inode->i_nlink = 0; 2278 status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac,
2120 /* do the real work now. */ 2279 &suballoc_loc,
2121 status = ocfs2_mknod_locked(osb, dir, inode, 2280 &suballoc_bit, di_blkno);
2122 0, &new_di_bh, parent_di_bh, handle,
2123 inode_ac);
2124 if (status < 0) { 2281 if (status < 0) {
2125 mlog_errno(status); 2282 mlog_errno(status);
2126 goto leave; 2283 goto leave;
2127 } 2284 }
2128 2285
2129 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name); 2286 inode->i_nlink = 0;
2287 /* do the real work now. */
2288 status = __ocfs2_mknod_locked(dir, inode,
2289 0, &new_di_bh, parent_di_bh, handle,
2290 inode_ac, di_blkno, suballoc_loc,
2291 suballoc_bit);
2130 if (status < 0) { 2292 if (status < 0) {
2131 mlog_errno(status); 2293 mlog_errno(status);
2132 goto leave; 2294 goto leave;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 73a11ccfd4c2..0afeda83120f 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2960,7 +2960,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2960 if (map_end & (PAGE_CACHE_SIZE - 1)) 2960 if (map_end & (PAGE_CACHE_SIZE - 1))
2961 to = map_end & (PAGE_CACHE_SIZE - 1); 2961 to = map_end & (PAGE_CACHE_SIZE - 1);
2962 2962
2963 page = grab_cache_page(mapping, page_index); 2963 page = find_or_create_page(mapping, page_index, GFP_NOFS);
2964 2964
2965 /* 2965 /*
2966 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page 2966 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
@@ -3179,7 +3179,8 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
3179 if (map_end > end) 3179 if (map_end > end)
3180 map_end = end; 3180 map_end = end;
3181 3181
3182 page = grab_cache_page(context->inode->i_mapping, page_index); 3182 page = find_or_create_page(context->inode->i_mapping,
3183 page_index, GFP_NOFS);
3183 BUG_ON(!page); 3184 BUG_ON(!page);
3184 3185
3185 wait_on_page_writeback(page); 3186 wait_on_page_writeback(page);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index a8e6a95a353f..8a286f54dca1 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -57,11 +57,28 @@ struct ocfs2_suballoc_result {
57 u64 sr_bg_blkno; /* The bg we allocated from. Set 57 u64 sr_bg_blkno; /* The bg we allocated from. Set
58 to 0 when a block group is 58 to 0 when a block group is
59 contiguous. */ 59 contiguous. */
60 u64 sr_bg_stable_blkno; /*
61 * Doesn't change, always
62 * set to target block
63 * group descriptor
64 * block.
65 */
60 u64 sr_blkno; /* The first allocated block */ 66 u64 sr_blkno; /* The first allocated block */
61 unsigned int sr_bit_offset; /* The bit in the bg */ 67 unsigned int sr_bit_offset; /* The bit in the bg */
62 unsigned int sr_bits; /* How many bits we claimed */ 68 unsigned int sr_bits; /* How many bits we claimed */
63}; 69};
64 70
71static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
72{
73 if (res->sr_blkno == 0)
74 return 0;
75
76 if (res->sr_bg_blkno)
77 return res->sr_bg_blkno;
78
79 return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset);
80}
81
65static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 82static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
66static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 83static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
67static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 84static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -138,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
138 brelse(ac->ac_bh); 155 brelse(ac->ac_bh);
139 ac->ac_bh = NULL; 156 ac->ac_bh = NULL;
140 ac->ac_resv = NULL; 157 ac->ac_resv = NULL;
158 if (ac->ac_find_loc_priv) {
159 kfree(ac->ac_find_loc_priv);
160 ac->ac_find_loc_priv = NULL;
161 }
141} 162}
142 163
143void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 164void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -1678,6 +1699,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1678 if (!ret) 1699 if (!ret)
1679 ocfs2_bg_discontig_fix_result(ac, gd, res); 1700 ocfs2_bg_discontig_fix_result(ac, gd, res);
1680 1701
1702 /*
1703 * sr_bg_blkno might have been changed by
1704 * ocfs2_bg_discontig_fix_result
1705 */
1706 res->sr_bg_stable_blkno = group_bh->b_blocknr;
1707
1708 if (ac->ac_find_loc_only)
1709 goto out_loc_only;
1710
1681 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, 1711 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1682 res->sr_bits, 1712 res->sr_bits,
1683 le16_to_cpu(gd->bg_chain)); 1713 le16_to_cpu(gd->bg_chain));
@@ -1691,6 +1721,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1691 if (ret < 0) 1721 if (ret < 0)
1692 mlog_errno(ret); 1722 mlog_errno(ret);
1693 1723
1724out_loc_only:
1694 *bits_left = le16_to_cpu(gd->bg_free_bits_count); 1725 *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1695 1726
1696out: 1727out:
@@ -1708,7 +1739,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1708{ 1739{
1709 int status; 1740 int status;
1710 u16 chain; 1741 u16 chain;
1711 u32 tmp_used;
1712 u64 next_group; 1742 u64 next_group;
1713 struct inode *alloc_inode = ac->ac_inode; 1743 struct inode *alloc_inode = ac->ac_inode;
1714 struct buffer_head *group_bh = NULL; 1744 struct buffer_head *group_bh = NULL;
@@ -1770,6 +1800,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1770 if (!status) 1800 if (!status)
1771 ocfs2_bg_discontig_fix_result(ac, bg, res); 1801 ocfs2_bg_discontig_fix_result(ac, bg, res);
1772 1802
1803 /*
1804 * sr_bg_blkno might have been changed by
1805 * ocfs2_bg_discontig_fix_result
1806 */
1807 res->sr_bg_stable_blkno = group_bh->b_blocknr;
1773 1808
1774 /* 1809 /*
1775 * Keep track of previous block descriptor read. When 1810 * Keep track of previous block descriptor read. When
@@ -1796,22 +1831,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1796 } 1831 }
1797 } 1832 }
1798 1833
1799 /* Ok, claim our bits now: set the info on dinode, chainlist 1834 if (ac->ac_find_loc_only)
1800 * and then the group */ 1835 goto out_loc_only;
1801 status = ocfs2_journal_access_di(handle, 1836
1802 INODE_CACHE(alloc_inode), 1837 status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle,
1803 ac->ac_bh, 1838 ac->ac_bh, res->sr_bits,
1804 OCFS2_JOURNAL_ACCESS_WRITE); 1839 chain);
1805 if (status < 0) { 1840 if (status) {
1806 mlog_errno(status); 1841 mlog_errno(status);
1807 goto bail; 1842 goto bail;
1808 } 1843 }
1809 1844
1810 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1811 fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used);
1812 le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits);
1813 ocfs2_journal_dirty(handle, ac->ac_bh);
1814
1815 status = ocfs2_block_group_set_bits(handle, 1845 status = ocfs2_block_group_set_bits(handle,
1816 alloc_inode, 1846 alloc_inode,
1817 bg, 1847 bg,
@@ -1826,6 +1856,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1826 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, 1856 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
1827 (unsigned long long)le64_to_cpu(fe->i_blkno)); 1857 (unsigned long long)le64_to_cpu(fe->i_blkno));
1828 1858
1859out_loc_only:
1829 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1860 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1830bail: 1861bail:
1831 brelse(group_bh); 1862 brelse(group_bh);
@@ -1845,6 +1876,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1845 int status; 1876 int status;
1846 u16 victim, i; 1877 u16 victim, i;
1847 u16 bits_left = 0; 1878 u16 bits_left = 0;
1879 u64 hint = ac->ac_last_group;
1848 struct ocfs2_chain_list *cl; 1880 struct ocfs2_chain_list *cl;
1849 struct ocfs2_dinode *fe; 1881 struct ocfs2_dinode *fe;
1850 1882
@@ -1872,7 +1904,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1872 goto bail; 1904 goto bail;
1873 } 1905 }
1874 1906
1875 res->sr_bg_blkno = ac->ac_last_group; 1907 res->sr_bg_blkno = hint;
1876 if (res->sr_bg_blkno) { 1908 if (res->sr_bg_blkno) {
1877 /* Attempt to short-circuit the usual search mechanism 1909 /* Attempt to short-circuit the usual search mechanism
1878 * by jumping straight to the most recently used 1910 * by jumping straight to the most recently used
@@ -1896,8 +1928,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1896 1928
1897 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, 1929 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1898 res, &bits_left); 1930 res, &bits_left);
1899 if (!status) 1931 if (!status) {
1932 hint = ocfs2_group_from_res(res);
1900 goto set_hint; 1933 goto set_hint;
1934 }
1901 if (status < 0 && status != -ENOSPC) { 1935 if (status < 0 && status != -ENOSPC) {
1902 mlog_errno(status); 1936 mlog_errno(status);
1903 goto bail; 1937 goto bail;
@@ -1920,8 +1954,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1920 ac->ac_chain = i; 1954 ac->ac_chain = i;
1921 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, 1955 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1922 res, &bits_left); 1956 res, &bits_left);
1923 if (!status) 1957 if (!status) {
1958 hint = ocfs2_group_from_res(res);
1924 break; 1959 break;
1960 }
1925 if (status < 0 && status != -ENOSPC) { 1961 if (status < 0 && status != -ENOSPC) {
1926 mlog_errno(status); 1962 mlog_errno(status);
1927 goto bail; 1963 goto bail;
@@ -1936,7 +1972,7 @@ set_hint:
1936 if (bits_left < min_bits) 1972 if (bits_left < min_bits)
1937 ac->ac_last_group = 0; 1973 ac->ac_last_group = 0;
1938 else 1974 else
1939 ac->ac_last_group = res->sr_bg_blkno; 1975 ac->ac_last_group = hint;
1940 } 1976 }
1941 1977
1942bail: 1978bail:
@@ -2016,6 +2052,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir,
2016 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; 2052 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
2017} 2053}
2018 2054
2055int ocfs2_find_new_inode_loc(struct inode *dir,
2056 struct buffer_head *parent_fe_bh,
2057 struct ocfs2_alloc_context *ac,
2058 u64 *fe_blkno)
2059{
2060 int ret;
2061 handle_t *handle = NULL;
2062 struct ocfs2_suballoc_result *res;
2063
2064 BUG_ON(!ac);
2065 BUG_ON(ac->ac_bits_given != 0);
2066 BUG_ON(ac->ac_bits_wanted != 1);
2067 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
2068
2069 res = kzalloc(sizeof(*res), GFP_NOFS);
2070 if (res == NULL) {
2071 ret = -ENOMEM;
2072 mlog_errno(ret);
2073 goto out;
2074 }
2075
2076 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
2077
2078 /*
2079 * The handle started here is for chain relink. Alternatively,
2080 * we could just disable relink for these calls.
2081 */
2082 handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC);
2083 if (IS_ERR(handle)) {
2084 ret = PTR_ERR(handle);
2085 handle = NULL;
2086 mlog_errno(ret);
2087 goto out;
2088 }
2089
2090 /*
2091 * This will instruct ocfs2_claim_suballoc_bits and
2092 * ocfs2_search_one_group to search but save actual allocation
2093 * for later.
2094 */
2095 ac->ac_find_loc_only = 1;
2096
2097 ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res);
2098 if (ret < 0) {
2099 mlog_errno(ret);
2100 goto out;
2101 }
2102
2103 ac->ac_find_loc_priv = res;
2104 *fe_blkno = res->sr_blkno;
2105
2106out:
2107 if (handle)
2108 ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
2109
2110 if (ret)
2111 kfree(res);
2112
2113 return ret;
2114}
2115
2116int ocfs2_claim_new_inode_at_loc(handle_t *handle,
2117 struct inode *dir,
2118 struct ocfs2_alloc_context *ac,
2119 u64 *suballoc_loc,
2120 u16 *suballoc_bit,
2121 u64 di_blkno)
2122{
2123 int ret;
2124 u16 chain;
2125 struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv;
2126 struct buffer_head *bg_bh = NULL;
2127 struct ocfs2_group_desc *bg;
2128 struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data;
2129
2130 /*
2131 * Since di_blkno is being passed back in, we check for any
2132 * inconsistencies which may have happened between
2133 * calls. These are code bugs as di_blkno is not expected to
2134 * change once returned from ocfs2_find_new_inode_loc()
2135 */
2136 BUG_ON(res->sr_blkno != di_blkno);
2137
2138 ret = ocfs2_read_group_descriptor(ac->ac_inode, di,
2139 res->sr_bg_stable_blkno, &bg_bh);
2140 if (ret) {
2141 mlog_errno(ret);
2142 goto out;
2143 }
2144
2145 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
2146 chain = le16_to_cpu(bg->bg_chain);
2147
2148 ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle,
2149 ac->ac_bh, res->sr_bits,
2150 chain);
2151 if (ret) {
2152 mlog_errno(ret);
2153 goto out;
2154 }
2155
2156 ret = ocfs2_block_group_set_bits(handle,
2157 ac->ac_inode,
2158 bg,
2159 bg_bh,
2160 res->sr_bit_offset,
2161 res->sr_bits);
2162 if (ret < 0) {
2163 mlog_errno(ret);
2164 goto out;
2165 }
2166
2167 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
2168 (unsigned long long)di_blkno);
2169
2170 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
2171
2172 BUG_ON(res->sr_bits != 1);
2173
2174 *suballoc_loc = res->sr_bg_blkno;
2175 *suballoc_bit = res->sr_bit_offset;
2176 ac->ac_bits_given++;
2177 ocfs2_save_inode_ac_group(dir, ac);
2178
2179out:
2180 brelse(bg_bh);
2181
2182 return ret;
2183}
2184
2019int ocfs2_claim_new_inode(handle_t *handle, 2185int ocfs2_claim_new_inode(handle_t *handle,
2020 struct inode *dir, 2186 struct inode *dir,
2021 struct buffer_head *parent_fe_bh, 2187 struct buffer_head *parent_fe_bh,
@@ -2567,7 +2733,8 @@ out:
2567 * suballoc_bit. 2733 * suballoc_bit.
2568 */ 2734 */
2569static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, 2735static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2570 u16 *suballoc_slot, u16 *suballoc_bit) 2736 u16 *suballoc_slot, u64 *group_blkno,
2737 u16 *suballoc_bit)
2571{ 2738{
2572 int status; 2739 int status;
2573 struct buffer_head *inode_bh = NULL; 2740 struct buffer_head *inode_bh = NULL;
@@ -2604,6 +2771,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2604 *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); 2771 *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
2605 if (suballoc_bit) 2772 if (suballoc_bit)
2606 *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); 2773 *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
2774 if (group_blkno)
2775 *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc);
2607 2776
2608bail: 2777bail:
2609 brelse(inode_bh); 2778 brelse(inode_bh);
@@ -2621,7 +2790,8 @@ bail:
2621 */ 2790 */
2622static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, 2791static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2623 struct inode *suballoc, 2792 struct inode *suballoc,
2624 struct buffer_head *alloc_bh, u64 blkno, 2793 struct buffer_head *alloc_bh,
2794 u64 group_blkno, u64 blkno,
2625 u16 bit, int *res) 2795 u16 bit, int *res)
2626{ 2796{
2627 struct ocfs2_dinode *alloc_di; 2797 struct ocfs2_dinode *alloc_di;
@@ -2642,10 +2812,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2642 goto bail; 2812 goto bail;
2643 } 2813 }
2644 2814
2645 if (alloc_di->i_suballoc_loc) 2815 bg_blkno = group_blkno ? group_blkno :
2646 bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc); 2816 ocfs2_which_suballoc_group(blkno, bit);
2647 else
2648 bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
2649 status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, 2817 status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
2650 &group_bh); 2818 &group_bh);
2651 if (status < 0) { 2819 if (status < 0) {
@@ -2680,6 +2848,7 @@ bail:
2680int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) 2848int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2681{ 2849{
2682 int status; 2850 int status;
2851 u64 group_blkno = 0;
2683 u16 suballoc_bit = 0, suballoc_slot = 0; 2852 u16 suballoc_bit = 0, suballoc_slot = 0;
2684 struct inode *inode_alloc_inode; 2853 struct inode *inode_alloc_inode;
2685 struct buffer_head *alloc_bh = NULL; 2854 struct buffer_head *alloc_bh = NULL;
@@ -2687,7 +2856,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2687 mlog_entry("blkno: %llu", (unsigned long long)blkno); 2856 mlog_entry("blkno: %llu", (unsigned long long)blkno);
2688 2857
2689 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, 2858 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
2690 &suballoc_bit); 2859 &group_blkno, &suballoc_bit);
2691 if (status < 0) { 2860 if (status < 0) {
2692 mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); 2861 mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
2693 goto bail; 2862 goto bail;
@@ -2715,7 +2884,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2715 } 2884 }
2716 2885
2717 status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, 2886 status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
2718 blkno, suballoc_bit, res); 2887 group_blkno, blkno, suballoc_bit, res);
2719 if (status < 0) 2888 if (status < 0)
2720 mlog(ML_ERROR, "test suballoc bit failed %d\n", status); 2889 mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
2721 2890
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a017dd3ee7d9..b8afabfeede4 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -56,6 +56,9 @@ struct ocfs2_alloc_context {
56 u64 ac_max_block; /* Highest block number to allocate. 0 is 56 u64 ac_max_block; /* Highest block number to allocate. 0 is
57 is the same as ~0 - unlimited */ 57 is the same as ~0 - unlimited */
58 58
59 int ac_find_loc_only; /* hack for reflink operation ordering */
60 struct ocfs2_suballoc_result *ac_find_loc_priv; /* */
61
59 struct ocfs2_alloc_reservation *ac_resv; 62 struct ocfs2_alloc_reservation *ac_resv;
60}; 63};
61 64
@@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
197 struct ocfs2_alloc_context **meta_ac); 200 struct ocfs2_alloc_context **meta_ac);
198 201
199int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); 202int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
203
204
205
206/*
207 * The following two interfaces are for ocfs2_create_inode_in_orphan().
208 */
209int ocfs2_find_new_inode_loc(struct inode *dir,
210 struct buffer_head *parent_fe_bh,
211 struct ocfs2_alloc_context *ac,
212 u64 *fe_blkno);
213
214int ocfs2_claim_new_inode_at_loc(handle_t *handle,
215 struct inode *dir,
216 struct ocfs2_alloc_context *ac,
217 u64 *suballoc_loc,
218 u16 *suballoc_bit,
219 u64 di_blkno);
220
200#endif /* _CHAINALLOC_H_ */ 221#endif /* _CHAINALLOC_H_ */
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 180cf5a0bd67..3b8b45660331 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -146,7 +146,7 @@ u64 stable_page_flags(struct page *page)
146 u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); 146 u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
147#endif 147#endif
148 148
149#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR 149#ifdef CONFIG_ARCH_USES_PG_UNCACHED
150 u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); 150 u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached);
151#endif 151#endif
152 152
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 439fc1f1c1c4..271afc48b9a5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
224 /* We don't show the stack guard page in /proc/maps */ 224 /* We don't show the stack guard page in /proc/maps */
225 start = vma->vm_start; 225 start = vma->vm_start;
226 if (vma->vm_flags & VM_GROWSDOWN) 226 if (vma->vm_flags & VM_GROWSDOWN)
227 start += PAGE_SIZE; 227 if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
228 start += PAGE_SIZE;
228 229
229 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 230 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
230 start, 231 start,
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1b27b5688f62..da3fefe91a8f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
340 char *p; 340 char *p;
341 341
342 p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); 342 p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
343 if (p) 343 if (!IS_ERR(p))
344 memmove(last_sysfs_file, p, strlen(p) + 1); 344 memmove(last_sysfs_file, p, strlen(p) + 1);
345 345
346 /* need attr_sd for attr and ops, its parent for kobj */ 346 /* need attr_sd for attr and ops, its parent for kobj */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 15412fe15c3a..b552f816de15 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -852,8 +852,8 @@ xfs_convert_page(
852 SetPageUptodate(page); 852 SetPageUptodate(page);
853 853
854 if (count) { 854 if (count) {
855 wbc->nr_to_write--; 855 if (--wbc->nr_to_write <= 0 &&
856 if (wbc->nr_to_write <= 0) 856 wbc->sync_mode == WB_SYNC_NONE)
857 done = 1; 857 done = 1;
858 } 858 }
859 xfs_start_page_writeback(page, !page_dirty, count); 859 xfs_start_page_writeback(page, !page_dirty, count);
@@ -1068,7 +1068,7 @@ xfs_vm_writepage(
1068 * by themselves. 1068 * by themselves.
1069 */ 1069 */
1070 if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) 1070 if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
1071 goto out_fail; 1071 goto redirty;
1072 1072
1073 /* 1073 /*
1074 * We need a transaction if there are delalloc or unwritten buffers 1074 * We need a transaction if there are delalloc or unwritten buffers
@@ -1080,7 +1080,7 @@ xfs_vm_writepage(
1080 */ 1080 */
1081 xfs_count_page_state(page, &delalloc, &unwritten); 1081 xfs_count_page_state(page, &delalloc, &unwritten);
1082 if ((current->flags & PF_FSTRANS) && (delalloc || unwritten)) 1082 if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
1083 goto out_fail; 1083 goto redirty;
1084 1084
1085 /* Is this page beyond the end of the file? */ 1085 /* Is this page beyond the end of the file? */
1086 offset = i_size_read(inode); 1086 offset = i_size_read(inode);
@@ -1245,12 +1245,15 @@ error:
1245 if (iohead) 1245 if (iohead)
1246 xfs_cancel_ioend(iohead); 1246 xfs_cancel_ioend(iohead);
1247 1247
1248 if (err == -EAGAIN)
1249 goto redirty;
1250
1248 xfs_aops_discard_page(page); 1251 xfs_aops_discard_page(page);
1249 ClearPageUptodate(page); 1252 ClearPageUptodate(page);
1250 unlock_page(page); 1253 unlock_page(page);
1251 return err; 1254 return err;
1252 1255
1253out_fail: 1256redirty:
1254 redirty_page_for_writepage(wbc, page); 1257 redirty_page_for_writepage(wbc, page);
1255 unlock_page(page); 1258 unlock_page(page);
1256 return 0; 1259 return 0;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ea79072f5210..286e36e21dae 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -440,12 +440,7 @@ _xfs_buf_find(
440 ASSERT(btp == bp->b_target); 440 ASSERT(btp == bp->b_target);
441 if (bp->b_file_offset == range_base && 441 if (bp->b_file_offset == range_base &&
442 bp->b_buffer_length == range_length) { 442 bp->b_buffer_length == range_length) {
443 /*
444 * If we look at something, bring it to the
445 * front of the list for next time.
446 */
447 atomic_inc(&bp->b_hold); 443 atomic_inc(&bp->b_hold);
448 list_move(&bp->b_hash_list, &hash->bh_list);
449 goto found; 444 goto found;
450 } 445 }
451 } 446 }
@@ -1443,8 +1438,7 @@ xfs_alloc_bufhash(
1443{ 1438{
1444 unsigned int i; 1439 unsigned int i;
1445 1440
1446 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ 1441 btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */
1447 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
1448 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * 1442 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
1449 sizeof(xfs_bufhash_t)); 1443 sizeof(xfs_bufhash_t));
1450 for (i = 0; i < (1 << btp->bt_hashshift); i++) { 1444 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
@@ -1938,7 +1932,8 @@ xfs_buf_init(void)
1938 if (!xfs_buf_zone) 1932 if (!xfs_buf_zone)
1939 goto out; 1933 goto out;
1940 1934
1941 xfslogd_workqueue = create_workqueue("xfslogd"); 1935 xfslogd_workqueue = alloc_workqueue("xfslogd",
1936 WQ_RESCUER | WQ_HIGHPRI, 1);
1942 if (!xfslogd_workqueue) 1937 if (!xfslogd_workqueue)
1943 goto out_free_buf_zone; 1938 goto out_free_buf_zone;
1944 1939
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index d072e5ff923b..2a05614f0b92 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -137,7 +137,6 @@ typedef struct xfs_buftarg {
137 size_t bt_smask; 137 size_t bt_smask;
138 138
139 /* per device buffer hash table */ 139 /* per device buffer hash table */
140 uint bt_hashmask;
141 uint bt_hashshift; 140 uint bt_hashshift;
142 xfs_bufhash_t *bt_hash; 141 xfs_bufhash_t *bt_hash;
143 142
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 237f5ffb2ee8..3b9e626f7cd1 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr(
785{ 785{
786 struct fsxattr fa; 786 struct fsxattr fa;
787 787
788 memset(&fa, 0, sizeof(struct fsxattr));
789
788 xfs_ilock(ip, XFS_ILOCK_SHARED); 790 xfs_ilock(ip, XFS_ILOCK_SHARED);
789 fa.fsx_xflags = xfs_ip2xflags(ip); 791 fa.fsx_xflags = xfs_ip2xflags(ip);
790 fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; 792 fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
@@ -907,6 +909,13 @@ xfs_ioctl_setattr(
907 return XFS_ERROR(EIO); 909 return XFS_ERROR(EIO);
908 910
909 /* 911 /*
912 * Disallow 32bit project ids because on-disk structure
913 * is 16bit only.
914 */
915 if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1))
916 return XFS_ERROR(EINVAL);
917
918 /*
910 * If disk quotas is on, we make sure that the dquots do exist on disk, 919 * If disk quotas is on, we make sure that the dquots do exist on disk,
911 * before we start any other transactions. Trying to do this later 920 * before we start any other transactions. Trying to do this later
912 * is messy. We don't care to take a readlock to look at the ids 921 * is messy. We don't care to take a readlock to look at the ids
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 68be25dcd301..b1fc2a6bfe83 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -664,7 +664,7 @@ xfs_vn_fiemap(
664 fieinfo->fi_extents_max + 1; 664 fieinfo->fi_extents_max + 1;
665 bm.bmv_count = min_t(__s32, bm.bmv_count, 665 bm.bmv_count = min_t(__s32, bm.bmv_count,
666 (PAGE_SIZE * 16 / sizeof(struct getbmapx))); 666 (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
667 bm.bmv_iflags = BMV_IF_PREALLOC; 667 bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
668 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) 668 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
669 bm.bmv_iflags |= BMV_IF_ATTRFORK; 669 bm.bmv_iflags |= BMV_IF_ATTRFORK;
670 if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) 670 if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 15c35b62ff14..a4e07974955b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1226,6 +1226,7 @@ xfs_fs_statfs(
1226 struct xfs_inode *ip = XFS_I(dentry->d_inode); 1226 struct xfs_inode *ip = XFS_I(dentry->d_inode);
1227 __uint64_t fakeinos, id; 1227 __uint64_t fakeinos, id;
1228 xfs_extlen_t lsize; 1228 xfs_extlen_t lsize;
1229 __int64_t ffree;
1229 1230
1230 statp->f_type = XFS_SB_MAGIC; 1231 statp->f_type = XFS_SB_MAGIC;
1231 statp->f_namelen = MAXNAMELEN - 1; 1232 statp->f_namelen = MAXNAMELEN - 1;
@@ -1249,7 +1250,11 @@ xfs_fs_statfs(
1249 statp->f_files = min_t(typeof(statp->f_files), 1250 statp->f_files = min_t(typeof(statp->f_files),
1250 statp->f_files, 1251 statp->f_files,
1251 mp->m_maxicount); 1252 mp->m_maxicount);
1252 statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); 1253
1254 /* make sure statp->f_ffree does not underflow */
1255 ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
1256 statp->f_ffree = max_t(__int64_t, ffree, 0);
1257
1253 spin_unlock(&mp->m_sb_lock); 1258 spin_unlock(&mp->m_sb_lock);
1254 1259
1255 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || 1260 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
@@ -1402,7 +1407,7 @@ xfs_fs_freeze(
1402 1407
1403 xfs_save_resvblks(mp); 1408 xfs_save_resvblks(mp);
1404 xfs_quiesce_attr(mp); 1409 xfs_quiesce_attr(mp);
1405 return -xfs_fs_log_dummy(mp); 1410 return -xfs_fs_log_dummy(mp, SYNC_WAIT);
1406} 1411}
1407 1412
1408STATIC int 1413STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index dfcbd98d1599..d59c4a65d492 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -34,6 +34,7 @@
34#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
35#include "xfs_quota.h" 35#include "xfs_quota.h"
36#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_fsops.h"
37 38
38#include <linux/kthread.h> 39#include <linux/kthread.h>
39#include <linux/freezer.h> 40#include <linux/freezer.h>
@@ -341,38 +342,6 @@ xfs_sync_attr(
341} 342}
342 343
343STATIC int 344STATIC int
344xfs_commit_dummy_trans(
345 struct xfs_mount *mp,
346 uint flags)
347{
348 struct xfs_inode *ip = mp->m_rootip;
349 struct xfs_trans *tp;
350 int error;
351
352 /*
353 * Put a dummy transaction in the log to tell recovery
354 * that all others are OK.
355 */
356 tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
357 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
358 if (error) {
359 xfs_trans_cancel(tp, 0);
360 return error;
361 }
362
363 xfs_ilock(ip, XFS_ILOCK_EXCL);
364
365 xfs_trans_ijoin(tp, ip);
366 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
367 error = xfs_trans_commit(tp, 0);
368 xfs_iunlock(ip, XFS_ILOCK_EXCL);
369
370 /* the log force ensures this transaction is pushed to disk */
371 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
372 return error;
373}
374
375STATIC int
376xfs_sync_fsdata( 345xfs_sync_fsdata(
377 struct xfs_mount *mp) 346 struct xfs_mount *mp)
378{ 347{
@@ -432,7 +401,7 @@ xfs_quiesce_data(
432 401
433 /* mark the log as covered if needed */ 402 /* mark the log as covered if needed */
434 if (xfs_log_need_covered(mp)) 403 if (xfs_log_need_covered(mp))
435 error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT); 404 error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
436 405
437 /* flush data-only devices */ 406 /* flush data-only devices */
438 if (mp->m_rtdev_targp) 407 if (mp->m_rtdev_targp)
@@ -563,7 +532,7 @@ xfs_flush_inodes(
563/* 532/*
564 * Every sync period we need to unpin all items, reclaim inodes and sync 533 * Every sync period we need to unpin all items, reclaim inodes and sync
565 * disk quotas. We might need to cover the log to indicate that the 534 * disk quotas. We might need to cover the log to indicate that the
566 * filesystem is idle. 535 * filesystem is idle and not frozen.
567 */ 536 */
568STATIC void 537STATIC void
569xfs_sync_worker( 538xfs_sync_worker(
@@ -577,8 +546,9 @@ xfs_sync_worker(
577 xfs_reclaim_inodes(mp, 0); 546 xfs_reclaim_inodes(mp, 0);
578 /* dgc: errors ignored here */ 547 /* dgc: errors ignored here */
579 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 548 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
580 if (xfs_log_need_covered(mp)) 549 if (mp->m_super->s_frozen == SB_UNFROZEN &&
581 error = xfs_commit_dummy_trans(mp, 0); 550 xfs_log_need_covered(mp))
551 error = xfs_fs_log_dummy(mp, 0);
582 } 552 }
583 mp->m_sync_seq++; 553 mp->m_sync_seq++;
584 wake_up(&mp->m_wait_single_sync_task); 554 wake_up(&mp->m_wait_single_sync_task);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 23f14e595c18..f90dadd5a968 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5533,12 +5533,24 @@ xfs_getbmap(
5533 map[i].br_startblock)) 5533 map[i].br_startblock))
5534 goto out_free_map; 5534 goto out_free_map;
5535 5535
5536 nexleft--;
5537 bmv->bmv_offset = 5536 bmv->bmv_offset =
5538 out[cur_ext].bmv_offset + 5537 out[cur_ext].bmv_offset +
5539 out[cur_ext].bmv_length; 5538 out[cur_ext].bmv_length;
5540 bmv->bmv_length = 5539 bmv->bmv_length =
5541 max_t(__int64_t, 0, bmvend - bmv->bmv_offset); 5540 max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
5541
5542 /*
5543 * In case we don't want to return the hole,
5544 * don't increase cur_ext so that we can reuse
5545 * it in the next loop.
5546 */
5547 if ((iflags & BMV_IF_NO_HOLES) &&
5548 map[i].br_startblock == HOLESTARTBLOCK) {
5549 memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
5550 continue;
5551 }
5552
5553 nexleft--;
5542 bmv->bmv_entries++; 5554 bmv->bmv_entries++;
5543 cur_ext++; 5555 cur_ext++;
5544 } 5556 }
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 7cf7220e7d5f..87c2e9d02288 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -114,8 +114,10 @@ struct getbmapx {
114#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ 114#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */
115#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ 115#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */
116#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ 116#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */
117#define BMV_IF_NO_HOLES 0x10 /* Do not return holes */
117#define BMV_IF_VALID \ 118#define BMV_IF_VALID \
118 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) 119 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \
120 BMV_IF_DELALLOC|BMV_IF_NO_HOLES)
119 121
120/* bmv_oflags values - returned for each non-header segment */ 122/* bmv_oflags values - returned for each non-header segment */
121#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ 123#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index dbca5f5c37ba..43b1d5699335 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -604,31 +604,36 @@ out:
604 return 0; 604 return 0;
605} 605}
606 606
607/*
608 * Dump a transaction into the log that contains no real change. This is needed
609 * to be able to make the log dirty or stamp the current tail LSN into the log
610 * during the covering operation.
611 *
612 * We cannot use an inode here for this - that will push dirty state back up
613 * into the VFS and then periodic inode flushing will prevent log covering from
614 * making progress. Hence we log a field in the superblock instead.
615 */
607int 616int
608xfs_fs_log_dummy( 617xfs_fs_log_dummy(
609 xfs_mount_t *mp) 618 xfs_mount_t *mp,
619 int flags)
610{ 620{
611 xfs_trans_t *tp; 621 xfs_trans_t *tp;
612 xfs_inode_t *ip;
613 int error; 622 int error;
614 623
615 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); 624 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
616 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); 625 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
626 XFS_DEFAULT_LOG_COUNT);
617 if (error) { 627 if (error) {
618 xfs_trans_cancel(tp, 0); 628 xfs_trans_cancel(tp, 0);
619 return error; 629 return error;
620 } 630 }
621 631
622 ip = mp->m_rootip; 632 /* log the UUID because it is an unchanging field */
623 xfs_ilock(ip, XFS_ILOCK_EXCL); 633 xfs_mod_sb(tp, XFS_SB_UUID);
624 634 if (flags & SYNC_WAIT)
625 xfs_trans_ijoin(tp, ip); 635 xfs_trans_set_sync(tp);
626 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 636 return xfs_trans_commit(tp, 0);
627 xfs_trans_set_sync(tp);
628 error = xfs_trans_commit(tp, 0);
629
630 xfs_iunlock(ip, XFS_ILOCK_EXCL);
631 return error;
632} 637}
633 638
634int 639int
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 88435e0a77c9..a786c5212c1e 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, 25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
26 xfs_fsop_resblks_t *outval); 26 xfs_fsop_resblks_t *outval);
27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); 27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
28extern int xfs_fs_log_dummy(xfs_mount_t *mp); 28extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags);
29 29
30#endif /* __XFS_FSOPS_H__ */ 30#endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index abf80ae1e95b..5371d2dc360e 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1213,7 +1213,6 @@ xfs_imap_lookup(
1213 struct xfs_inobt_rec_incore rec; 1213 struct xfs_inobt_rec_incore rec;
1214 struct xfs_btree_cur *cur; 1214 struct xfs_btree_cur *cur;
1215 struct xfs_buf *agbp; 1215 struct xfs_buf *agbp;
1216 xfs_agino_t startino;
1217 int error; 1216 int error;
1218 int i; 1217 int i;
1219 1218
@@ -1227,13 +1226,13 @@ xfs_imap_lookup(
1227 } 1226 }
1228 1227
1229 /* 1228 /*
1230 * derive and lookup the exact inode record for the given agino. If the 1229 * Lookup the inode record for the given agino. If the record cannot be
1231 * record cannot be found, then it's an invalid inode number and we 1230 * found, then it's an invalid inode number and we should abort. Once
1232 * should abort. 1231 * we have a record, we need to ensure it contains the inode number
1232 * we are looking up.
1233 */ 1233 */
1234 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); 1234 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1235 startino = agino & ~(XFS_IALLOC_INODES(mp) - 1); 1235 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1236 error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
1237 if (!error) { 1236 if (!error) {
1238 if (i) 1237 if (i)
1239 error = xfs_inobt_get_rec(cur, &rec, &i); 1238 error = xfs_inobt_get_rec(cur, &rec, &i);
@@ -1246,6 +1245,11 @@ xfs_imap_lookup(
1246 if (error) 1245 if (error)
1247 return error; 1246 return error;
1248 1247
1248 /* check that the returned record contains the required inode */
1249 if (rec.ir_startino > agino ||
1250 rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
1251 return EINVAL;
1252
1249 /* for untrusted inodes check it is allocated first */ 1253 /* for untrusted inodes check it is allocated first */
1250 if ((flags & XFS_IGET_UNTRUSTED) && 1254 if ((flags & XFS_IGET_UNTRUSTED) &&
1251 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) 1255 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 68415cb4f23c..34798f391c49 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1914,6 +1914,11 @@ xfs_iunlink_remove(
1914 return 0; 1914 return 0;
1915} 1915}
1916 1916
1917/*
1918 * A big issue when freeing the inode cluster is is that we _cannot_ skip any
1919 * inodes that are in memory - they all must be marked stale and attached to
1920 * the cluster buffer.
1921 */
1917STATIC void 1922STATIC void
1918xfs_ifree_cluster( 1923xfs_ifree_cluster(
1919 xfs_inode_t *free_ip, 1924 xfs_inode_t *free_ip,
@@ -1945,8 +1950,6 @@ xfs_ifree_cluster(
1945 } 1950 }
1946 1951
1947 for (j = 0; j < nbufs; j++, inum += ninodes) { 1952 for (j = 0; j < nbufs; j++, inum += ninodes) {
1948 int found = 0;
1949
1950 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 1953 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
1951 XFS_INO_TO_AGBNO(mp, inum)); 1954 XFS_INO_TO_AGBNO(mp, inum));
1952 1955
@@ -1965,7 +1968,9 @@ xfs_ifree_cluster(
1965 /* 1968 /*
1966 * Walk the inodes already attached to the buffer and mark them 1969 * Walk the inodes already attached to the buffer and mark them
1967 * stale. These will all have the flush locks held, so an 1970 * stale. These will all have the flush locks held, so an
1968 * in-memory inode walk can't lock them. 1971 * in-memory inode walk can't lock them. By marking them all
1972 * stale first, we will not attempt to lock them in the loop
1973 * below as the XFS_ISTALE flag will be set.
1969 */ 1974 */
1970 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 1975 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
1971 while (lip) { 1976 while (lip) {
@@ -1977,11 +1982,11 @@ xfs_ifree_cluster(
1977 &iip->ili_flush_lsn, 1982 &iip->ili_flush_lsn,
1978 &iip->ili_item.li_lsn); 1983 &iip->ili_item.li_lsn);
1979 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 1984 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
1980 found++;
1981 } 1985 }
1982 lip = lip->li_bio_list; 1986 lip = lip->li_bio_list;
1983 } 1987 }
1984 1988
1989
1985 /* 1990 /*
1986 * For each inode in memory attempt to add it to the inode 1991 * For each inode in memory attempt to add it to the inode
1987 * buffer and set it up for being staled on buffer IO 1992 * buffer and set it up for being staled on buffer IO
@@ -1993,6 +1998,7 @@ xfs_ifree_cluster(
1993 * even trying to lock them. 1998 * even trying to lock them.
1994 */ 1999 */
1995 for (i = 0; i < ninodes; i++) { 2000 for (i = 0; i < ninodes; i++) {
2001retry:
1996 read_lock(&pag->pag_ici_lock); 2002 read_lock(&pag->pag_ici_lock);
1997 ip = radix_tree_lookup(&pag->pag_ici_root, 2003 ip = radix_tree_lookup(&pag->pag_ici_root,
1998 XFS_INO_TO_AGINO(mp, (inum + i))); 2004 XFS_INO_TO_AGINO(mp, (inum + i)));
@@ -2003,38 +2009,36 @@ xfs_ifree_cluster(
2003 continue; 2009 continue;
2004 } 2010 }
2005 2011
2006 /* don't try to lock/unlock the current inode */ 2012 /*
2013 * Don't try to lock/unlock the current inode, but we
2014 * _cannot_ skip the other inodes that we did not find
2015 * in the list attached to the buffer and are not
2016 * already marked stale. If we can't lock it, back off
2017 * and retry.
2018 */
2007 if (ip != free_ip && 2019 if (ip != free_ip &&
2008 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2020 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2009 read_unlock(&pag->pag_ici_lock); 2021 read_unlock(&pag->pag_ici_lock);
2010 continue; 2022 delay(1);
2023 goto retry;
2011 } 2024 }
2012 read_unlock(&pag->pag_ici_lock); 2025 read_unlock(&pag->pag_ici_lock);
2013 2026
2014 if (!xfs_iflock_nowait(ip)) { 2027 xfs_iflock(ip);
2015 if (ip != free_ip)
2016 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2017 continue;
2018 }
2019
2020 xfs_iflags_set(ip, XFS_ISTALE); 2028 xfs_iflags_set(ip, XFS_ISTALE);
2021 if (xfs_inode_clean(ip)) {
2022 ASSERT(ip != free_ip);
2023 xfs_ifunlock(ip);
2024 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2025 continue;
2026 }
2027 2029
2030 /*
2031 * we don't need to attach clean inodes or those only
2032 * with unlogged changes (which we throw away, anyway).
2033 */
2028 iip = ip->i_itemp; 2034 iip = ip->i_itemp;
2029 if (!iip) { 2035 if (!iip || xfs_inode_clean(ip)) {
2030 /* inode with unlogged changes only */
2031 ASSERT(ip != free_ip); 2036 ASSERT(ip != free_ip);
2032 ip->i_update_core = 0; 2037 ip->i_update_core = 0;
2033 xfs_ifunlock(ip); 2038 xfs_ifunlock(ip);
2034 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2039 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2035 continue; 2040 continue;
2036 } 2041 }
2037 found++;
2038 2042
2039 iip->ili_last_fields = iip->ili_format.ilf_fields; 2043 iip->ili_last_fields = iip->ili_format.ilf_fields;
2040 iip->ili_format.ilf_fields = 0; 2044 iip->ili_format.ilf_fields = 0;
@@ -2049,8 +2053,7 @@ xfs_ifree_cluster(
2049 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2053 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2050 } 2054 }
2051 2055
2052 if (found) 2056 xfs_trans_stale_inode_buf(tp, bp);
2053 xfs_trans_stale_inode_buf(tp, bp);
2054 xfs_trans_binval(tp, bp); 2057 xfs_trans_binval(tp, bp);
2055 } 2058 }
2056 2059
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 925d572bf0f4..33f718f92a48 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3015,7 +3015,8 @@ _xfs_log_force(
3015 3015
3016 XFS_STATS_INC(xs_log_force); 3016 XFS_STATS_INC(xs_log_force);
3017 3017
3018 xlog_cil_push(log, 1); 3018 if (log->l_cilp)
3019 xlog_cil_force(log);
3019 3020
3020 spin_lock(&log->l_icloglock); 3021 spin_lock(&log->l_icloglock);
3021 3022
@@ -3167,7 +3168,7 @@ _xfs_log_force_lsn(
3167 XFS_STATS_INC(xs_log_force); 3168 XFS_STATS_INC(xs_log_force);
3168 3169
3169 if (log->l_cilp) { 3170 if (log->l_cilp) {
3170 lsn = xlog_cil_push_lsn(log, lsn); 3171 lsn = xlog_cil_force_lsn(log, lsn);
3171 if (lsn == NULLCOMMITLSN) 3172 if (lsn == NULLCOMMITLSN)
3172 return 0; 3173 return 0;
3173 } 3174 }
@@ -3724,7 +3725,7 @@ xfs_log_force_umount(
3724 * call below. 3725 * call below.
3725 */ 3726 */
3726 if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) 3727 if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
3727 xlog_cil_push(log, 1); 3728 xlog_cil_force(log);
3728 3729
3729 /* 3730 /*
3730 * We must hold both the GRANT lock and the LOG lock, 3731 * We must hold both the GRANT lock and the LOG lock,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 31e4ea2d19ac..ed575fb4b495 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -68,6 +68,7 @@ xlog_cil_init(
68 ctx->sequence = 1; 68 ctx->sequence = 1;
69 ctx->cil = cil; 69 ctx->cil = cil;
70 cil->xc_ctx = ctx; 70 cil->xc_ctx = ctx;
71 cil->xc_current_sequence = ctx->sequence;
71 72
72 cil->xc_log = log; 73 cil->xc_log = log;
73 log->l_cilp = cil; 74 log->l_cilp = cil;
@@ -269,15 +270,10 @@ xlog_cil_insert(
269static void 270static void
270xlog_cil_format_items( 271xlog_cil_format_items(
271 struct log *log, 272 struct log *log,
272 struct xfs_log_vec *log_vector, 273 struct xfs_log_vec *log_vector)
273 struct xlog_ticket *ticket,
274 xfs_lsn_t *start_lsn)
275{ 274{
276 struct xfs_log_vec *lv; 275 struct xfs_log_vec *lv;
277 276
278 if (start_lsn)
279 *start_lsn = log->l_cilp->xc_ctx->sequence;
280
281 ASSERT(log_vector); 277 ASSERT(log_vector);
282 for (lv = log_vector; lv; lv = lv->lv_next) { 278 for (lv = log_vector; lv; lv = lv->lv_next) {
283 void *ptr; 279 void *ptr;
@@ -301,9 +297,24 @@ xlog_cil_format_items(
301 ptr += vec->i_len; 297 ptr += vec->i_len;
302 } 298 }
303 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); 299 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
300 }
301}
302
303static void
304xlog_cil_insert_items(
305 struct log *log,
306 struct xfs_log_vec *log_vector,
307 struct xlog_ticket *ticket,
308 xfs_lsn_t *start_lsn)
309{
310 struct xfs_log_vec *lv;
311
312 if (start_lsn)
313 *start_lsn = log->l_cilp->xc_ctx->sequence;
304 314
315 ASSERT(log_vector);
316 for (lv = log_vector; lv; lv = lv->lv_next)
305 xlog_cil_insert(log, ticket, lv->lv_item, lv); 317 xlog_cil_insert(log, ticket, lv->lv_item, lv);
306 }
307} 318}
308 319
309static void 320static void
@@ -321,80 +332,6 @@ xlog_cil_free_logvec(
321} 332}
322 333
323/* 334/*
324 * Commit a transaction with the given vector to the Committed Item List.
325 *
326 * To do this, we need to format the item, pin it in memory if required and
327 * account for the space used by the transaction. Once we have done that we
328 * need to release the unused reservation for the transaction, attach the
329 * transaction to the checkpoint context so we carry the busy extents through
330 * to checkpoint completion, and then unlock all the items in the transaction.
331 *
332 * For more specific information about the order of operations in
333 * xfs_log_commit_cil() please refer to the comments in
334 * xfs_trans_commit_iclog().
335 *
336 * Called with the context lock already held in read mode to lock out
337 * background commit, returns without it held once background commits are
338 * allowed again.
339 */
340int
341xfs_log_commit_cil(
342 struct xfs_mount *mp,
343 struct xfs_trans *tp,
344 struct xfs_log_vec *log_vector,
345 xfs_lsn_t *commit_lsn,
346 int flags)
347{
348 struct log *log = mp->m_log;
349 int log_flags = 0;
350 int push = 0;
351
352 if (flags & XFS_TRANS_RELEASE_LOG_RES)
353 log_flags = XFS_LOG_REL_PERM_RESERV;
354
355 if (XLOG_FORCED_SHUTDOWN(log)) {
356 xlog_cil_free_logvec(log_vector);
357 return XFS_ERROR(EIO);
358 }
359
360 /* lock out background commit */
361 down_read(&log->l_cilp->xc_ctx_lock);
362 xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
363
364 /* check we didn't blow the reservation */
365 if (tp->t_ticket->t_curr_res < 0)
366 xlog_print_tic_res(log->l_mp, tp->t_ticket);
367
368 /* attach the transaction to the CIL if it has any busy extents */
369 if (!list_empty(&tp->t_busy)) {
370 spin_lock(&log->l_cilp->xc_cil_lock);
371 list_splice_init(&tp->t_busy,
372 &log->l_cilp->xc_ctx->busy_extents);
373 spin_unlock(&log->l_cilp->xc_cil_lock);
374 }
375
376 tp->t_commit_lsn = *commit_lsn;
377 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
378 xfs_trans_unreserve_and_mod_sb(tp);
379
380 /* check for background commit before unlock */
381 if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
382 push = 1;
383 up_read(&log->l_cilp->xc_ctx_lock);
384
385 /*
386 * We need to push CIL every so often so we don't cache more than we
387 * can fit in the log. The limit really is that a checkpoint can't be
388 * more than half the log (the current checkpoint is not allowed to
389 * overwrite the previous checkpoint), but commit latency and memory
390 * usage limit this to a smaller size in most cases.
391 */
392 if (push)
393 xlog_cil_push(log, 0);
394 return 0;
395}
396
397/*
398 * Mark all items committed and clear busy extents. We free the log vector 335 * Mark all items committed and clear busy extents. We free the log vector
399 * chains in a separate pass so that we unpin the log items as quickly as 336 * chains in a separate pass so that we unpin the log items as quickly as
400 * possible. 337 * possible.
@@ -427,13 +364,23 @@ xlog_cil_committed(
427} 364}
428 365
429/* 366/*
430 * Push the Committed Item List to the log. If the push_now flag is not set, 367 * Push the Committed Item List to the log. If @push_seq flag is zero, then it
431 * then it is a background flush and so we can chose to ignore it. 368 * is a background flush and so we can chose to ignore it. Otherwise, if the
369 * current sequence is the same as @push_seq we need to do a flush. If
370 * @push_seq is less than the current sequence, then it has already been
371 * flushed and we don't need to do anything - the caller will wait for it to
372 * complete if necessary.
373 *
374 * @push_seq is a value rather than a flag because that allows us to do an
375 * unlocked check of the sequence number for a match. Hence we can allows log
376 * forces to run racily and not issue pushes for the same sequence twice. If we
377 * get a race between multiple pushes for the same sequence they will block on
378 * the first one and then abort, hence avoiding needless pushes.
432 */ 379 */
433int 380STATIC int
434xlog_cil_push( 381xlog_cil_push(
435 struct log *log, 382 struct log *log,
436 int push_now) 383 xfs_lsn_t push_seq)
437{ 384{
438 struct xfs_cil *cil = log->l_cilp; 385 struct xfs_cil *cil = log->l_cilp;
439 struct xfs_log_vec *lv; 386 struct xfs_log_vec *lv;
@@ -453,12 +400,14 @@ xlog_cil_push(
453 if (!cil) 400 if (!cil)
454 return 0; 401 return 0;
455 402
403 ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
404
456 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); 405 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
457 new_ctx->ticket = xlog_cil_ticket_alloc(log); 406 new_ctx->ticket = xlog_cil_ticket_alloc(log);
458 407
459 /* lock out transaction commit, but don't block on background push */ 408 /* lock out transaction commit, but don't block on background push */
460 if (!down_write_trylock(&cil->xc_ctx_lock)) { 409 if (!down_write_trylock(&cil->xc_ctx_lock)) {
461 if (!push_now) 410 if (!push_seq)
462 goto out_free_ticket; 411 goto out_free_ticket;
463 down_write(&cil->xc_ctx_lock); 412 down_write(&cil->xc_ctx_lock);
464 } 413 }
@@ -469,7 +418,11 @@ xlog_cil_push(
469 goto out_skip; 418 goto out_skip;
470 419
471 /* check for spurious background flush */ 420 /* check for spurious background flush */
472 if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) 421 if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
422 goto out_skip;
423
424 /* check for a previously pushed seqeunce */
425 if (push_seq < cil->xc_ctx->sequence)
473 goto out_skip; 426 goto out_skip;
474 427
475 /* 428 /*
@@ -515,6 +468,13 @@ xlog_cil_push(
515 cil->xc_ctx = new_ctx; 468 cil->xc_ctx = new_ctx;
516 469
517 /* 470 /*
471 * mirror the new sequence into the cil structure so that we can do
472 * unlocked checks against the current sequence in log forces without
473 * risking deferencing a freed context pointer.
474 */
475 cil->xc_current_sequence = new_ctx->sequence;
476
477 /*
518 * The switch is now done, so we can drop the context lock and move out 478 * The switch is now done, so we can drop the context lock and move out
519 * of a shared context. We can't just go straight to the commit record, 479 * of a shared context. We can't just go straight to the commit record,
520 * though - we need to synchronise with previous and future commits so 480 * though - we need to synchronise with previous and future commits so
@@ -626,6 +586,102 @@ out_abort:
626} 586}
627 587
628/* 588/*
589 * Commit a transaction with the given vector to the Committed Item List.
590 *
591 * To do this, we need to format the item, pin it in memory if required and
592 * account for the space used by the transaction. Once we have done that we
593 * need to release the unused reservation for the transaction, attach the
594 * transaction to the checkpoint context so we carry the busy extents through
595 * to checkpoint completion, and then unlock all the items in the transaction.
596 *
597 * For more specific information about the order of operations in
598 * xfs_log_commit_cil() please refer to the comments in
599 * xfs_trans_commit_iclog().
600 *
601 * Called with the context lock already held in read mode to lock out
602 * background commit, returns without it held once background commits are
603 * allowed again.
604 */
605int
606xfs_log_commit_cil(
607 struct xfs_mount *mp,
608 struct xfs_trans *tp,
609 struct xfs_log_vec *log_vector,
610 xfs_lsn_t *commit_lsn,
611 int flags)
612{
613 struct log *log = mp->m_log;
614 int log_flags = 0;
615 int push = 0;
616
617 if (flags & XFS_TRANS_RELEASE_LOG_RES)
618 log_flags = XFS_LOG_REL_PERM_RESERV;
619
620 if (XLOG_FORCED_SHUTDOWN(log)) {
621 xlog_cil_free_logvec(log_vector);
622 return XFS_ERROR(EIO);
623 }
624
625 /*
626 * do all the hard work of formatting items (including memory
627 * allocation) outside the CIL context lock. This prevents stalling CIL
628 * pushes when we are low on memory and a transaction commit spends a
629 * lot of time in memory reclaim.
630 */
631 xlog_cil_format_items(log, log_vector);
632
633 /* lock out background commit */
634 down_read(&log->l_cilp->xc_ctx_lock);
635 xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn);
636
637 /* check we didn't blow the reservation */
638 if (tp->t_ticket->t_curr_res < 0)
639 xlog_print_tic_res(log->l_mp, tp->t_ticket);
640
641 /* attach the transaction to the CIL if it has any busy extents */
642 if (!list_empty(&tp->t_busy)) {
643 spin_lock(&log->l_cilp->xc_cil_lock);
644 list_splice_init(&tp->t_busy,
645 &log->l_cilp->xc_ctx->busy_extents);
646 spin_unlock(&log->l_cilp->xc_cil_lock);
647 }
648
649 tp->t_commit_lsn = *commit_lsn;
650 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
651 xfs_trans_unreserve_and_mod_sb(tp);
652
653 /*
654 * Once all the items of the transaction have been copied to the CIL,
655 * the items can be unlocked and freed.
656 *
657 * This needs to be done before we drop the CIL context lock because we
658 * have to update state in the log items and unlock them before they go
659 * to disk. If we don't, then the CIL checkpoint can race with us and
660 * we can run checkpoint completion before we've updated and unlocked
661 * the log items. This affects (at least) processing of stale buffers,
662 * inodes and EFIs.
663 */
664 xfs_trans_free_items(tp, *commit_lsn, 0);
665
666 /* check for background commit before unlock */
667 if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
668 push = 1;
669
670 up_read(&log->l_cilp->xc_ctx_lock);
671
672 /*
673 * We need to push CIL every so often so we don't cache more than we
674 * can fit in the log. The limit really is that a checkpoint can't be
675 * more than half the log (the current checkpoint is not allowed to
676 * overwrite the previous checkpoint), but commit latency and memory
677 * usage limit this to a smaller size in most cases.
678 */
679 if (push)
680 xlog_cil_push(log, 0);
681 return 0;
682}
683
684/*
629 * Conditionally push the CIL based on the sequence passed in. 685 * Conditionally push the CIL based on the sequence passed in.
630 * 686 *
631 * We only need to push if we haven't already pushed the sequence 687 * We only need to push if we haven't already pushed the sequence
@@ -639,39 +695,34 @@ out_abort:
639 * commit lsn is there. It'll be empty, so this is broken for now. 695 * commit lsn is there. It'll be empty, so this is broken for now.
640 */ 696 */
641xfs_lsn_t 697xfs_lsn_t
642xlog_cil_push_lsn( 698xlog_cil_force_lsn(
643 struct log *log, 699 struct log *log,
644 xfs_lsn_t push_seq) 700 xfs_lsn_t sequence)
645{ 701{
646 struct xfs_cil *cil = log->l_cilp; 702 struct xfs_cil *cil = log->l_cilp;
647 struct xfs_cil_ctx *ctx; 703 struct xfs_cil_ctx *ctx;
648 xfs_lsn_t commit_lsn = NULLCOMMITLSN; 704 xfs_lsn_t commit_lsn = NULLCOMMITLSN;
649 705
650restart: 706 ASSERT(sequence <= cil->xc_current_sequence);
651 down_write(&cil->xc_ctx_lock); 707
652 ASSERT(push_seq <= cil->xc_ctx->sequence); 708 /*
653 709 * check to see if we need to force out the current context.
654 /* check to see if we need to force out the current context */ 710 * xlog_cil_push() handles racing pushes for the same sequence,
655 if (push_seq == cil->xc_ctx->sequence) { 711 * so no need to deal with it here.
656 up_write(&cil->xc_ctx_lock); 712 */
657 xlog_cil_push(log, 1); 713 if (sequence == cil->xc_current_sequence)
658 goto restart; 714 xlog_cil_push(log, sequence);
659 }
660 715
661 /* 716 /*
662 * See if we can find a previous sequence still committing. 717 * See if we can find a previous sequence still committing.
663 * We can drop the flush lock as soon as we have the cil lock
664 * because we are now only comparing contexts protected by
665 * the cil lock.
666 *
667 * We need to wait for all previous sequence commits to complete 718 * We need to wait for all previous sequence commits to complete
668 * before allowing the force of push_seq to go ahead. Hence block 719 * before allowing the force of push_seq to go ahead. Hence block
669 * on commits for those as well. 720 * on commits for those as well.
670 */ 721 */
722restart:
671 spin_lock(&cil->xc_cil_lock); 723 spin_lock(&cil->xc_cil_lock);
672 up_write(&cil->xc_ctx_lock);
673 list_for_each_entry(ctx, &cil->xc_committing, committing) { 724 list_for_each_entry(ctx, &cil->xc_committing, committing) {
674 if (ctx->sequence > push_seq) 725 if (ctx->sequence > sequence)
675 continue; 726 continue;
676 if (!ctx->commit_lsn) { 727 if (!ctx->commit_lsn) {
677 /* 728 /*
@@ -681,7 +732,7 @@ restart:
681 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0); 732 sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
682 goto restart; 733 goto restart;
683 } 734 }
684 if (ctx->sequence != push_seq) 735 if (ctx->sequence != sequence)
685 continue; 736 continue;
686 /* found it! */ 737 /* found it! */
687 commit_lsn = ctx->commit_lsn; 738 commit_lsn = ctx->commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8c072618965c..ced52b98b322 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -422,6 +422,7 @@ struct xfs_cil {
422 struct rw_semaphore xc_ctx_lock; 422 struct rw_semaphore xc_ctx_lock;
423 struct list_head xc_committing; 423 struct list_head xc_committing;
424 sv_t xc_commit_wait; 424 sv_t xc_commit_wait;
425 xfs_lsn_t xc_current_sequence;
425}; 426};
426 427
427/* 428/*
@@ -562,8 +563,16 @@ int xlog_cil_init(struct log *log);
562void xlog_cil_init_post_recovery(struct log *log); 563void xlog_cil_init_post_recovery(struct log *log);
563void xlog_cil_destroy(struct log *log); 564void xlog_cil_destroy(struct log *log);
564 565
565int xlog_cil_push(struct log *log, int push_now); 566/*
566xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence); 567 * CIL force routines
568 */
569xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
570
571static inline void
572xlog_cil_force(struct log *log)
573{
574 xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
575}
567 576
568/* 577/*
569 * Unmount record type is used as a pseudo transaction type for the ticket. 578 * Unmount record type is used as a pseudo transaction type for the ticket.
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index fdca7416c754..1c47edaea0d2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1167,7 +1167,7 @@ xfs_trans_del_item(
1167 * Unlock all of the items of a transaction and free all the descriptors 1167 * Unlock all of the items of a transaction and free all the descriptors
1168 * of that transaction. 1168 * of that transaction.
1169 */ 1169 */
1170STATIC void 1170void
1171xfs_trans_free_items( 1171xfs_trans_free_items(
1172 struct xfs_trans *tp, 1172 struct xfs_trans *tp,
1173 xfs_lsn_t commit_lsn, 1173 xfs_lsn_t commit_lsn,
@@ -1653,9 +1653,6 @@ xfs_trans_commit_cil(
1653 return error; 1653 return error;
1654 1654
1655 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1655 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1656
1657 /* xfs_trans_free_items() unlocks them first */
1658 xfs_trans_free_items(tp, *commit_lsn, 0);
1659 xfs_trans_free(tp); 1656 xfs_trans_free(tp);
1660 return 0; 1657 return 0;
1661} 1658}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index e2d93d8ead7b..62da86c90de5 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -25,7 +25,8 @@ struct xfs_trans;
25 25
26void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); 26void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
27void xfs_trans_del_item(struct xfs_log_item *); 27void xfs_trans_del_item(struct xfs_log_item *);
28 28void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
29 int flags);
29void xfs_trans_item_committed(struct xfs_log_item *lip, 30void xfs_trans_item_committed(struct xfs_log_item *lip,
30 xfs_lsn_t commit_lsn, int aborted); 31 xfs_lsn_t commit_lsn, int aborted);
31void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); 32void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 66d585c6917c..4c7c7bfb2b2f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2299,15 +2299,22 @@ xfs_alloc_file_space(
2299 e = allocatesize_fsb; 2299 e = allocatesize_fsb;
2300 } 2300 }
2301 2301
2302 /*
2303 * The transaction reservation is limited to a 32-bit block
2304 * count, hence we need to limit the number of blocks we are
2305 * trying to reserve to avoid an overflow. We can't allocate
2306 * more than @nimaps extents, and an extent is limited on disk
2307 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
2308 */
2309 resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
2302 if (unlikely(rt)) { 2310 if (unlikely(rt)) {
2303 resrtextents = qblocks = (uint)(e - s); 2311 resrtextents = qblocks = resblks;
2304 resrtextents /= mp->m_sb.sb_rextsize; 2312 resrtextents /= mp->m_sb.sb_rextsize;
2305 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 2313 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
2306 quota_flag = XFS_QMOPT_RES_RTBLKS; 2314 quota_flag = XFS_QMOPT_RES_RTBLKS;
2307 } else { 2315 } else {
2308 resrtextents = 0; 2316 resrtextents = 0;
2309 resblks = qblocks = \ 2317 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
2310 XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
2311 quota_flag = XFS_QMOPT_RES_REGBLKS; 2318 quota_flag = XFS_QMOPT_RES_REGBLKS;
2312 } 2319 }
2313 2320