aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2011-01-10 14:48:02 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-01-10 14:48:02 -0500
commit68c404b18f6fba404b2753622d0459c68ee128ae (patch)
treec1ec0bb12f19d91071b461cc2831d9d3dd4c74f3 /fs
parentd035c36c58dd9183ad6aa7875dea89893faedb55 (diff)
parent6650239a4b01077e80d5a4468562756d77afaa59 (diff)
Merge branch 'bugfixes' into nfs-for-2.6.38
Conflicts: fs/nfs/nfs2xdr.c fs/nfs/nfs3xdr.c fs/nfs/nfs4xdr.c
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/ceph/dir.c3
-rw-r--r--fs/ceph/file.c39
-rw-r--r--fs/ext4/resize.c5
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/readwrite.c3
-rw-r--r--fs/namei.c3
-rw-r--r--fs/nfs/dir.c44
-rw-r--r--fs/nfs/nfs2xdr.c5
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs4xdr.c6
-rw-r--r--fs/notify/fanotify/fanotify.c6
-rw-r--r--fs/notify/fanotify/fanotify_user.c81
-rw-r--r--fs/notify/inotify/inotify_user.c1
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/aops.h23
-rw-r--r--fs/ocfs2/cluster/masklog.c3
-rw-r--r--fs/ocfs2/cluster/masklog.h15
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/file.c15
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
22 files changed, 197 insertions, 117 deletions
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 6f0444473594..659f532d26a0 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -166,7 +166,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
166static struct dentry *btrfs_get_parent(struct dentry *child) 166static struct dentry *btrfs_get_parent(struct dentry *child)
167{ 167{
168 struct inode *dir = child->d_inode; 168 struct inode *dir = child->d_inode;
169 static struct dentry *dentry; 169 struct dentry *dentry;
170 struct btrfs_root *root = BTRFS_I(dir)->root; 170 struct btrfs_root *root = BTRFS_I(dir)->root;
171 struct btrfs_path *path; 171 struct btrfs_path *path;
172 struct extent_buffer *leaf; 172 struct extent_buffer *leaf;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 158c700fdca5..d902948a90d8 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,7 +40,8 @@ int ceph_init_dentry(struct dentry *dentry)
40 if (dentry->d_fsdata) 40 if (dentry->d_fsdata)
41 return 0; 41 return 0;
42 42
43 if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) 43 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
44 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
44 dentry->d_op = &ceph_dentry_ops; 45 dentry->d_op = &ceph_dentry_ops;
45 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) 46 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
46 dentry->d_op = &ceph_snapdir_dentry_ops; 47 dentry->d_op = &ceph_snapdir_dentry_ops;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8d79b8912e31..7d0e4a82d898 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -282,7 +282,8 @@ int ceph_release(struct inode *inode, struct file *file)
282static int striped_read(struct inode *inode, 282static int striped_read(struct inode *inode,
283 u64 off, u64 len, 283 u64 off, u64 len,
284 struct page **pages, int num_pages, 284 struct page **pages, int num_pages,
285 int *checkeof, bool align_to_pages) 285 int *checkeof, bool align_to_pages,
286 unsigned long buf_align)
286{ 287{
287 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 288 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
288 struct ceph_inode_info *ci = ceph_inode(inode); 289 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -307,7 +308,7 @@ static int striped_read(struct inode *inode,
307 308
308more: 309more:
309 if (align_to_pages) 310 if (align_to_pages)
310 page_align = (pos - io_align) & ~PAGE_MASK; 311 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
311 else 312 else
312 page_align = pos & ~PAGE_MASK; 313 page_align = pos & ~PAGE_MASK;
313 this_len = left; 314 this_len = left;
@@ -376,16 +377,18 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
376 struct inode *inode = file->f_dentry->d_inode; 377 struct inode *inode = file->f_dentry->d_inode;
377 struct page **pages; 378 struct page **pages;
378 u64 off = *poff; 379 u64 off = *poff;
379 int num_pages = calc_pages_for(off, len); 380 int num_pages, ret;
380 int ret;
381 381
382 dout("sync_read on file %p %llu~%u %s\n", file, off, len, 382 dout("sync_read on file %p %llu~%u %s\n", file, off, len,
383 (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); 383 (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
384 384
385 if (file->f_flags & O_DIRECT) 385 if (file->f_flags & O_DIRECT) {
386 pages = ceph_get_direct_page_vector(data, num_pages); 386 num_pages = calc_pages_for((unsigned long)data, len);
387 else 387 pages = ceph_get_direct_page_vector(data, num_pages, true);
388 } else {
389 num_pages = calc_pages_for(off, len);
388 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); 390 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
391 }
389 if (IS_ERR(pages)) 392 if (IS_ERR(pages))
390 return PTR_ERR(pages); 393 return PTR_ERR(pages);
391 394
@@ -400,7 +403,8 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
400 goto done; 403 goto done;
401 404
402 ret = striped_read(inode, off, len, pages, num_pages, checkeof, 405 ret = striped_read(inode, off, len, pages, num_pages, checkeof,
403 file->f_flags & O_DIRECT); 406 file->f_flags & O_DIRECT,
407 (unsigned long)data & ~PAGE_MASK);
404 408
405 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) 409 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
406 ret = ceph_copy_page_vector_to_user(pages, data, off, ret); 410 ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -409,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
409 413
410done: 414done:
411 if (file->f_flags & O_DIRECT) 415 if (file->f_flags & O_DIRECT)
412 ceph_put_page_vector(pages, num_pages); 416 ceph_put_page_vector(pages, num_pages, true);
413 else 417 else
414 ceph_release_page_vector(pages, num_pages); 418 ceph_release_page_vector(pages, num_pages);
415 dout("sync_read result %d\n", ret); 419 dout("sync_read result %d\n", ret);
@@ -456,6 +460,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
456 int do_sync = 0; 460 int do_sync = 0;
457 int check_caps = 0; 461 int check_caps = 0;
458 int page_align, io_align; 462 int page_align, io_align;
463 unsigned long buf_align;
459 int ret; 464 int ret;
460 struct timespec mtime = CURRENT_TIME; 465 struct timespec mtime = CURRENT_TIME;
461 466
@@ -471,6 +476,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
471 pos = *offset; 476 pos = *offset;
472 477
473 io_align = pos & ~PAGE_MASK; 478 io_align = pos & ~PAGE_MASK;
479 buf_align = (unsigned long)data & ~PAGE_MASK;
474 480
475 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); 481 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
476 if (ret < 0) 482 if (ret < 0)
@@ -496,12 +502,15 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
496 */ 502 */
497more: 503more:
498 len = left; 504 len = left;
499 if (file->f_flags & O_DIRECT) 505 if (file->f_flags & O_DIRECT) {
500 /* write from beginning of first page, regardless of 506 /* write from beginning of first page, regardless of
501 io alignment */ 507 io alignment */
502 page_align = (pos - io_align) & ~PAGE_MASK; 508 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
503 else 509 num_pages = calc_pages_for((unsigned long)data, len);
510 } else {
504 page_align = pos & ~PAGE_MASK; 511 page_align = pos & ~PAGE_MASK;
512 num_pages = calc_pages_for(pos, len);
513 }
505 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 514 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
506 ceph_vino(inode), pos, &len, 515 ceph_vino(inode), pos, &len,
507 CEPH_OSD_OP_WRITE, flags, 516 CEPH_OSD_OP_WRITE, flags,
@@ -512,10 +521,8 @@ more:
512 if (!req) 521 if (!req)
513 return -ENOMEM; 522 return -ENOMEM;
514 523
515 num_pages = calc_pages_for(pos, len);
516
517 if (file->f_flags & O_DIRECT) { 524 if (file->f_flags & O_DIRECT) {
518 pages = ceph_get_direct_page_vector(data, num_pages); 525 pages = ceph_get_direct_page_vector(data, num_pages, false);
519 if (IS_ERR(pages)) { 526 if (IS_ERR(pages)) {
520 ret = PTR_ERR(pages); 527 ret = PTR_ERR(pages);
521 goto out; 528 goto out;
@@ -565,7 +572,7 @@ more:
565 } 572 }
566 573
567 if (file->f_flags & O_DIRECT) 574 if (file->f_flags & O_DIRECT)
568 ceph_put_page_vector(pages, num_pages); 575 ceph_put_page_vector(pages, num_pages, false);
569 else if (file->f_flags & O_SYNC) 576 else if (file->f_flags & O_SYNC)
570 ceph_release_page_vector(pages, num_pages); 577 ceph_release_page_vector(pages, num_pages);
571 578
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index dc963929de65..981c8477adab 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -232,6 +232,8 @@ static int setup_new_group_blocks(struct super_block *sb,
232 GFP_NOFS); 232 GFP_NOFS);
233 if (err) 233 if (err)
234 goto exit_bh; 234 goto exit_bh;
235 for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
236 ext4_set_bit(bit, bh->b_data);
235 237
236 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 238 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
237 input->block_bitmap - start); 239 input->block_bitmap - start);
@@ -247,6 +249,9 @@ static int setup_new_group_blocks(struct super_block *sb,
247 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 249 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
248 if (err) 250 if (err)
249 goto exit_bh; 251 goto exit_bh;
252 for (i = 0, bit = input->inode_table - start;
253 i < sbi->s_itb_per_group; i++, bit++)
254 ext4_set_bit(bit, bh->b_data);
250 255
251 if ((err = extend_or_restart_transaction(handle, 2, bh))) 256 if ((err = extend_or_restart_transaction(handle, 2, bh)))
252 goto exit_bh; 257 goto exit_bh;
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index f46ee8b0e135..9da29706f91c 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
828 super->s_journal_seg[i] = segno; 828 super->s_journal_seg[i] = segno;
829 super->s_journal_ec[i] = ec; 829 super->s_journal_ec[i] = ec;
830 logfs_set_segment_reserved(sb, segno); 830 logfs_set_segment_reserved(sb, segno);
831 err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); 831 err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
832 BUG_ON(err); /* mempool should prevent this */ 832 BUG_ON(err); /* mempool should prevent this */
833 err = logfs_erase_segment(sb, segno, 1); 833 err = logfs_erase_segment(sb, segno, 1);
834 BUG_ON(err); /* FIXME: remount-ro would be nicer */ 834 BUG_ON(err); /* FIXME: remount-ro would be nicer */
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 6127baf0e188..ee99a9f5dfd3 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode)
1994 1994
1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */ 1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */
1996 err = logfs_write_buf(master_inode, page, 0); 1996 err = logfs_write_buf(master_inode, page, 0);
1997 if (err)
1998 move_page_to_inode(inode, page);
1999
1997 logfs_put_write_page(page); 2000 logfs_put_write_page(page);
1998 return err; 2001 return err;
1999} 2002}
diff --git a/fs/namei.c b/fs/namei.c
index 5362af9b7372..4ff7ca530533 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1748,6 +1748,9 @@ struct file *do_filp_open(int dfd, const char *pathname,
1748 if (!(open_flag & O_CREAT)) 1748 if (!(open_flag & O_CREAT))
1749 mode = 0; 1749 mode = 0;
1750 1750
1751 /* Must never be set by userspace */
1752 open_flag &= ~FMODE_NONOTIFY;
1753
1751 /* 1754 /*
1752 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1755 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1753 * check for O_DSYNC if the need any syncing at all we enforce it's 1756 * check for O_DSYNC if the need any syncing at all we enforce it's
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 65d5cb4f70b1..16ec096f6b24 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,7 +33,6 @@
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/mount.h> 34#include <linux/mount.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/vmalloc.h>
37#include <linux/kmemleak.h> 36#include <linux/kmemleak.h>
38#include <linux/xattr.h> 37#include <linux/xattr.h>
39 38
@@ -461,25 +460,26 @@ out:
461/* Perform conversion from xdr to cache array */ 460/* Perform conversion from xdr to cache array */
462static 461static
463int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, 462int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
464 void *xdr_page, struct page *page, unsigned int buflen) 463 struct page **xdr_pages, struct page *page, unsigned int buflen)
465{ 464{
466 struct xdr_stream stream; 465 struct xdr_stream stream;
467 struct xdr_buf buf; 466 struct xdr_buf buf = {
468 __be32 *ptr = xdr_page; 467 .pages = xdr_pages,
468 .page_len = buflen,
469 .buflen = buflen,
470 .len = buflen,
471 };
472 struct page *scratch;
469 struct nfs_cache_array *array; 473 struct nfs_cache_array *array;
470 unsigned int count = 0; 474 unsigned int count = 0;
471 int status; 475 int status;
472 476
473 buf.head->iov_base = xdr_page; 477 scratch = alloc_page(GFP_KERNEL);
474 buf.head->iov_len = buflen; 478 if (scratch == NULL)
475 buf.tail->iov_len = 0; 479 return -ENOMEM;
476 buf.page_base = 0;
477 buf.page_len = 0;
478 buf.buflen = buf.head->iov_len;
479 buf.len = buf.head->iov_len;
480
481 xdr_init_decode(&stream, &buf, ptr);
482 480
481 xdr_init_decode(&stream, &buf, NULL);
482 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
483 483
484 do { 484 do {
485 status = xdr_decode(desc, entry, &stream); 485 status = xdr_decode(desc, entry, &stream);
@@ -508,6 +508,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
508 } else 508 } else
509 status = PTR_ERR(array); 509 status = PTR_ERR(array);
510 } 510 }
511
512 put_page(scratch);
511 return status; 513 return status;
512} 514}
513 515
@@ -523,7 +525,6 @@ static
523void nfs_readdir_free_large_page(void *ptr, struct page **pages, 525void nfs_readdir_free_large_page(void *ptr, struct page **pages,
524 unsigned int npages) 526 unsigned int npages)
525{ 527{
526 vm_unmap_ram(ptr, npages);
527 nfs_readdir_free_pagearray(pages, npages); 528 nfs_readdir_free_pagearray(pages, npages);
528} 529}
529 530
@@ -532,9 +533,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
532 * to nfs_readdir_free_large_page 533 * to nfs_readdir_free_large_page
533 */ 534 */
534static 535static
535void *nfs_readdir_large_page(struct page **pages, unsigned int npages) 536int nfs_readdir_large_page(struct page **pages, unsigned int npages)
536{ 537{
537 void *ptr;
538 unsigned int i; 538 unsigned int i;
539 539
540 for (i = 0; i < npages; i++) { 540 for (i = 0; i < npages; i++) {
@@ -543,13 +543,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
543 goto out_freepages; 543 goto out_freepages;
544 pages[i] = page; 544 pages[i] = page;
545 } 545 }
546 return 0;
546 547
547 ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
548 if (!IS_ERR_OR_NULL(ptr))
549 return ptr;
550out_freepages: 548out_freepages:
551 nfs_readdir_free_pagearray(pages, i); 549 nfs_readdir_free_pagearray(pages, i);
552 return NULL; 550 return -ENOMEM;
553} 551}
554 552
555static 553static
@@ -580,8 +578,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
580 memset(array, 0, sizeof(struct nfs_cache_array)); 578 memset(array, 0, sizeof(struct nfs_cache_array));
581 array->eof_index = -1; 579 array->eof_index = -1;
582 580
583 pages_ptr = nfs_readdir_large_page(pages, array_size); 581 status = nfs_readdir_large_page(pages, array_size);
584 if (!pages_ptr) 582 if (status < 0)
585 goto out_release_array; 583 goto out_release_array;
586 do { 584 do {
587 unsigned int pglen; 585 unsigned int pglen;
@@ -590,7 +588,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
590 if (status < 0) 588 if (status < 0)
591 break; 589 break;
592 pglen = status; 590 pglen = status;
593 status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen); 591 status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
594 if (status < 0) { 592 if (status < 0) {
595 if (status == -ENOSPC) 593 if (status == -ENOSPC)
596 status = 0; 594 status = 0;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 51f1cfa04d27..792cb13a4304 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -943,11 +943,6 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
943 943
944 entry->d_type = DT_UNKNOWN; 944 entry->d_type = DT_UNKNOWN;
945 945
946 /* Peek at the next entry to see if we're at EOD */
947 p = xdr_inline_peek(xdr, 4 + 4);
948 entry->eof = 0;
949 if (p != NULL)
950 entry->eof = (p[0] == xdr_zero) && (p[1] != xdr_zero);
951 return 0; 946 return 0;
952 947
953out_overflow: 948out_overflow:
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index df30a26cc4fa..01c5e8b1941d 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1989,11 +1989,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
1989 zero_nfs_fh3(entry->fh); 1989 zero_nfs_fh3(entry->fh);
1990 } 1990 }
1991 1991
1992 /* Peek at the next entry to see if we're at EOD */
1993 p = xdr_inline_peek(xdr, 4 + 4);
1994 entry->eof = 0;
1995 if (p != NULL)
1996 entry->eof = (p[0] == xdr_zero) && (p[1] != xdr_zero);
1997 return 0; 1992 return 0;
1998 1993
1999out_overflow: 1994out_overflow:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8e496887ec61..2ab8e5cb8f59 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6135,12 +6135,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6135 if (verify_attr_len(xdr, p, len) < 0) 6135 if (verify_attr_len(xdr, p, len) < 0)
6136 goto out_overflow; 6136 goto out_overflow;
6137 6137
6138 p = xdr_inline_peek(xdr, 8);
6139 if (p != NULL)
6140 entry->eof = !p[0] && p[1];
6141 else
6142 entry->eof = 0;
6143
6144 return 0; 6138 return 0;
6145 6139
6146out_overflow: 6140out_overflow:
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index b04f88eed09e..f35794b97e8e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
92 92
93 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 93 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
94 94
95 wait_event(group->fanotify_data.access_waitq, event->response); 95 wait_event(group->fanotify_data.access_waitq, event->response ||
96 atomic_read(&group->fanotify_data.bypass_perm));
97
98 if (!event->response) /* bypass_perm set */
99 return 0;
96 100
97 /* userspace responded, convert to something usable */ 101 /* userspace responded, convert to something usable */
98 spin_lock(&event->lock); 102 spin_lock(&event->lock);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 063224812b7e..8b61220cffc5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -106,20 +106,29 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
106 return client_fd; 106 return client_fd;
107} 107}
108 108
109static ssize_t fill_event_metadata(struct fsnotify_group *group, 109static int fill_event_metadata(struct fsnotify_group *group,
110 struct fanotify_event_metadata *metadata, 110 struct fanotify_event_metadata *metadata,
111 struct fsnotify_event *event) 111 struct fsnotify_event *event)
112{ 112{
113 int ret = 0;
114
113 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, 115 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
114 group, metadata, event); 116 group, metadata, event);
115 117
116 metadata->event_len = FAN_EVENT_METADATA_LEN; 118 metadata->event_len = FAN_EVENT_METADATA_LEN;
119 metadata->metadata_len = FAN_EVENT_METADATA_LEN;
117 metadata->vers = FANOTIFY_METADATA_VERSION; 120 metadata->vers = FANOTIFY_METADATA_VERSION;
118 metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; 121 metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
119 metadata->pid = pid_vnr(event->tgid); 122 metadata->pid = pid_vnr(event->tgid);
120 metadata->fd = create_fd(group, event); 123 if (unlikely(event->mask & FAN_Q_OVERFLOW))
124 metadata->fd = FAN_NOFD;
125 else {
126 metadata->fd = create_fd(group, event);
127 if (metadata->fd < 0)
128 ret = metadata->fd;
129 }
121 130
122 return metadata->fd; 131 return ret;
123} 132}
124 133
125#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 134#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
@@ -200,7 +209,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
200 209
201 mutex_lock(&group->fanotify_data.access_mutex); 210 mutex_lock(&group->fanotify_data.access_mutex);
202 211
203 if (group->fanotify_data.bypass_perm) { 212 if (atomic_read(&group->fanotify_data.bypass_perm)) {
204 mutex_unlock(&group->fanotify_data.access_mutex); 213 mutex_unlock(&group->fanotify_data.access_mutex);
205 kmem_cache_free(fanotify_response_event_cache, re); 214 kmem_cache_free(fanotify_response_event_cache, re);
206 event->response = FAN_ALLOW; 215 event->response = FAN_ALLOW;
@@ -257,24 +266,34 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
257 266
258 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 267 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
259 268
260 fd = fill_event_metadata(group, &fanotify_event_metadata, event); 269 ret = fill_event_metadata(group, &fanotify_event_metadata, event);
261 if (fd < 0) 270 if (ret < 0)
262 return fd; 271 goto out;
263 272
273 fd = fanotify_event_metadata.fd;
264 ret = prepare_for_access_response(group, event, fd); 274 ret = prepare_for_access_response(group, event, fd);
265 if (ret) 275 if (ret)
266 goto out_close_fd; 276 goto out_close_fd;
267 277
268 ret = -EFAULT; 278 ret = -EFAULT;
269 if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN)) 279 if (copy_to_user(buf, &fanotify_event_metadata,
280 fanotify_event_metadata.event_len))
270 goto out_kill_access_response; 281 goto out_kill_access_response;
271 282
272 return FAN_EVENT_METADATA_LEN; 283 return fanotify_event_metadata.event_len;
273 284
274out_kill_access_response: 285out_kill_access_response:
275 remove_access_response(group, event, fd); 286 remove_access_response(group, event, fd);
276out_close_fd: 287out_close_fd:
277 sys_close(fd); 288 if (fd != FAN_NOFD)
289 sys_close(fd);
290out:
291#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
292 if (event->mask & FAN_ALL_PERM_EVENTS) {
293 event->response = FAN_DENY;
294 wake_up(&group->fanotify_data.access_waitq);
295 }
296#endif
278 return ret; 297 return ret;
279} 298}
280 299
@@ -382,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
382 401
383 mutex_lock(&group->fanotify_data.access_mutex); 402 mutex_lock(&group->fanotify_data.access_mutex);
384 403
385 group->fanotify_data.bypass_perm = true; 404 atomic_inc(&group->fanotify_data.bypass_perm);
386 405
387 list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { 406 list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
388 pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, 407 pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
@@ -586,11 +605,10 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
586{ 605{
587 struct fsnotify_mark *fsn_mark; 606 struct fsnotify_mark *fsn_mark;
588 __u32 added; 607 __u32 added;
608 int ret = 0;
589 609
590 fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); 610 fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
591 if (!fsn_mark) { 611 if (!fsn_mark) {
592 int ret;
593
594 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) 612 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
595 return -ENOSPC; 613 return -ENOSPC;
596 614
@@ -600,17 +618,16 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
600 618
601 fsnotify_init_mark(fsn_mark, fanotify_free_mark); 619 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
602 ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); 620 ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
603 if (ret) { 621 if (ret)
604 fanotify_free_mark(fsn_mark); 622 goto err;
605 return ret;
606 }
607 } 623 }
608 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 624 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
609 fsnotify_put_mark(fsn_mark); 625
610 if (added & ~mnt->mnt_fsnotify_mask) 626 if (added & ~mnt->mnt_fsnotify_mask)
611 fsnotify_recalc_vfsmount_mask(mnt); 627 fsnotify_recalc_vfsmount_mask(mnt);
612 628err:
613 return 0; 629 fsnotify_put_mark(fsn_mark);
630 return ret;
614} 631}
615 632
616static int fanotify_add_inode_mark(struct fsnotify_group *group, 633static int fanotify_add_inode_mark(struct fsnotify_group *group,
@@ -619,6 +636,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
619{ 636{
620 struct fsnotify_mark *fsn_mark; 637 struct fsnotify_mark *fsn_mark;
621 __u32 added; 638 __u32 added;
639 int ret = 0;
622 640
623 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); 641 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
624 642
@@ -634,8 +652,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
634 652
635 fsn_mark = fsnotify_find_inode_mark(group, inode); 653 fsn_mark = fsnotify_find_inode_mark(group, inode);
636 if (!fsn_mark) { 654 if (!fsn_mark) {
637 int ret;
638
639 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) 655 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
640 return -ENOSPC; 656 return -ENOSPC;
641 657
@@ -645,16 +661,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
645 661
646 fsnotify_init_mark(fsn_mark, fanotify_free_mark); 662 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
647 ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); 663 ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
648 if (ret) { 664 if (ret)
649 fanotify_free_mark(fsn_mark); 665 goto err;
650 return ret;
651 }
652 } 666 }
653 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 667 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
654 fsnotify_put_mark(fsn_mark); 668
655 if (added & ~inode->i_fsnotify_mask) 669 if (added & ~inode->i_fsnotify_mask)
656 fsnotify_recalc_inode_mask(inode); 670 fsnotify_recalc_inode_mask(inode);
657 return 0; 671err:
672 fsnotify_put_mark(fsn_mark);
673 return ret;
658} 674}
659 675
660/* fanotify syscalls */ 676/* fanotify syscalls */
@@ -687,8 +703,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
687 703
688 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ 704 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
689 group = fsnotify_alloc_group(&fanotify_fsnotify_ops); 705 group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
690 if (IS_ERR(group)) 706 if (IS_ERR(group)) {
707 free_uid(user);
691 return PTR_ERR(group); 708 return PTR_ERR(group);
709 }
692 710
693 group->fanotify_data.user = user; 711 group->fanotify_data.user = user;
694 atomic_inc(&user->fanotify_listeners); 712 atomic_inc(&user->fanotify_listeners);
@@ -698,6 +716,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
698 mutex_init(&group->fanotify_data.access_mutex); 716 mutex_init(&group->fanotify_data.access_mutex);
699 init_waitqueue_head(&group->fanotify_data.access_waitq); 717 init_waitqueue_head(&group->fanotify_data.access_waitq);
700 INIT_LIST_HEAD(&group->fanotify_data.access_list); 718 INIT_LIST_HEAD(&group->fanotify_data.access_list);
719 atomic_set(&group->fanotify_data.bypass_perm, 0);
701#endif 720#endif
702 switch (flags & FAN_ALL_CLASS_BITS) { 721 switch (flags & FAN_ALL_CLASS_BITS) {
703 case FAN_CLASS_NOTIF: 722 case FAN_CLASS_NOTIF:
@@ -764,8 +783,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
764 if (flags & ~FAN_ALL_MARK_FLAGS) 783 if (flags & ~FAN_ALL_MARK_FLAGS)
765 return -EINVAL; 784 return -EINVAL;
766 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { 785 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
767 case FAN_MARK_ADD: 786 case FAN_MARK_ADD: /* fallthrough */
768 case FAN_MARK_REMOVE: 787 case FAN_MARK_REMOVE:
788 if (!mask)
789 return -EINVAL;
769 case FAN_MARK_FLUSH: 790 case FAN_MARK_FLUSH:
770 break; 791 break;
771 default: 792 default:
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 444c305a468c..4cd5d5d78f9f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -752,6 +752,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
752 if (ret >= 0) 752 if (ret >= 0)
753 return ret; 753 return ret;
754 754
755 fsnotify_put_group(group);
755 atomic_dec(&user->inotify_devs); 756 atomic_dec(&user->inotify_devs);
756out_free_uid: 757out_free_uid:
757 free_uid(user); 758 free_uid(user);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962cb3b73..0d7c5540ad66 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
573 /* this io's submitter should not have unlocked this before we could */ 573 /* this io's submitter should not have unlocked this before we could */
574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
575 575
576 if (ocfs2_iocb_is_sem_locked(iocb)) {
577 up_read(&inode->i_alloc_sem);
578 ocfs2_iocb_clear_sem_locked(iocb);
579 }
580
576 ocfs2_iocb_clear_rw_locked(iocb); 581 ocfs2_iocb_clear_rw_locked(iocb);
577 582
578 level = ocfs2_iocb_rw_locked_level(iocb); 583 level = ocfs2_iocb_rw_locked_level(iocb);
579 if (!level)
580 up_read(&inode->i_alloc_sem);
581 ocfs2_rw_unlock(inode, level); 584 ocfs2_rw_unlock(inode, level);
582 585
583 if (is_async) 586 if (is_async)
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfda691a..eceb456037c1 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
68 else 68 else
69 clear_bit(1, (unsigned long *)&iocb->private); 69 clear_bit(1, (unsigned long *)&iocb->private);
70} 70}
71
72/*
73 * Using a named enum representing lock types in terms of #N bit stored in
74 * iocb->private, which is going to be used for communication bewteen
75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
76 */
77enum ocfs2_iocb_lock_bits {
78 OCFS2_IOCB_RW_LOCK = 0,
79 OCFS2_IOCB_RW_LOCK_LEVEL,
80 OCFS2_IOCB_SEM,
81 OCFS2_IOCB_NUM_LOCKS
82};
83
71#define ocfs2_iocb_clear_rw_locked(iocb) \ 84#define ocfs2_iocb_clear_rw_locked(iocb) \
72 clear_bit(0, (unsigned long *)&iocb->private) 85 clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
73#define ocfs2_iocb_rw_locked_level(iocb) \ 86#define ocfs2_iocb_rw_locked_level(iocb) \
74 test_bit(1, (unsigned long *)&iocb->private) 87 test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
88#define ocfs2_iocb_set_sem_locked(iocb) \
89 set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
90#define ocfs2_iocb_clear_sem_locked(iocb) \
91 clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
92#define ocfs2_iocb_is_sem_locked(iocb) \
93 test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
75#endif /* OCFS2_FILE_H */ 94#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index c7fba396392d..6c61771469af 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
113 define_mask(QUOTA), 113 define_mask(QUOTA),
114 define_mask(REFCOUNT), 114 define_mask(REFCOUNT),
115 define_mask(BASTS), 115 define_mask(BASTS),
116 define_mask(RESERVATIONS),
117 define_mask(CLUSTER),
116 define_mask(ERROR), 118 define_mask(ERROR),
117 define_mask(NOTICE), 119 define_mask(NOTICE),
118 define_mask(KTHREAD), 120 define_mask(KTHREAD),
119 define_mask(RESERVATIONS),
120}; 121};
121 122
122static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; 123static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index ea2ed9f56c94..34d6544357d9 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -81,7 +81,7 @@
81#include <linux/sched.h> 81#include <linux/sched.h>
82 82
83/* bits that are frequently given and infrequently matched in the low word */ 83/* bits that are frequently given and infrequently matched in the low word */
84/* NOTE: If you add a flag, you need to also update mlog.c! */ 84/* NOTE: If you add a flag, you need to also update masklog.c! */
85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */ 85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */
86#define ML_EXIT 0x0000000000000002ULL /* func call exit */ 86#define ML_EXIT 0x0000000000000002ULL /* func call exit */
87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */ 87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */
@@ -114,13 +114,14 @@
114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ 115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ 116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
117#define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */ 117#define ML_BASTS 0x0000000100000000ULL /* dlmglue asts and basts */
118#define ML_RESERVATIONS 0x0000000200000000ULL /* ocfs2 alloc reservations */
119#define ML_CLUSTER 0x0000000400000000ULL /* cluster stack */
120
118/* bits that are infrequently given and frequently matched in the high word */ 121/* bits that are infrequently given and frequently matched in the high word */
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 122#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 123#define ML_NOTICE 0x2000000000000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 124#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
123#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
124 125
125#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 126#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
126#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 127#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c49f6de0e7ab..d417b3f9b0c7 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2461 2461
2462 di->i_dx_root = cpu_to_le64(dr_blkno); 2462 di->i_dx_root = cpu_to_le64(dr_blkno);
2463 2463
2464 spin_lock(&OCFS2_I(dir)->ip_lock);
2464 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; 2465 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
2465 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 2466 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
2467 spin_unlock(&OCFS2_I(dir)->ip_lock);
2466 2468
2467 ocfs2_journal_dirty(handle, di_bh); 2469 ocfs2_journal_dirty(handle, di_bh);
2468 2470
@@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4466 goto out_commit; 4468 goto out_commit;
4467 } 4469 }
4468 4470
4471 spin_lock(&OCFS2_I(dir)->ip_lock);
4469 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL; 4472 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
4470 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 4473 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
4474 spin_unlock(&OCFS2_I(dir)->ip_lock);
4471 di->i_dx_root = cpu_to_le64(0ULL); 4475 di->i_dx_root = cpu_to_le64(0ULL);
4472 4476
4473 ocfs2_journal_dirty(handle, di_bh); 4477 ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f564b0e5f80d..59f0f6bdfc62 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
2346 */ 2346 */
2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, 2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2348 struct dlm_lock_resource *res, 2348 struct dlm_lock_resource *res,
2349 int *numlocks) 2349 int *numlocks,
2350 int *hasrefs)
2350{ 2351{
2351 int ret; 2352 int ret;
2352 int i; 2353 int i;
@@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2356 2357
2357 assert_spin_locked(&res->spinlock); 2358 assert_spin_locked(&res->spinlock);
2358 2359
2360 *numlocks = 0;
2361 *hasrefs = 0;
2362
2359 ret = -EINVAL; 2363 ret = -EINVAL;
2360 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 2364 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
2361 mlog(0, "cannot migrate lockres with unknown owner!\n"); 2365 mlog(0, "cannot migrate lockres with unknown owner!\n");
@@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2386 } 2390 }
2387 2391
2388 *numlocks = count; 2392 *numlocks = count;
2389 mlog(0, "migrateable lockres having %d locks\n", *numlocks); 2393
2394 count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2395 if (count < O2NM_MAX_NODES)
2396 *hasrefs = 1;
2397
2398 mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
2399 res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
2390 2400
2391leave: 2401leave:
2392 return ret; 2402 return ret;
@@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2408 const char *name; 2418 const char *name;
2409 unsigned int namelen; 2419 unsigned int namelen;
2410 int mle_added = 0; 2420 int mle_added = 0;
2411 int numlocks; 2421 int numlocks, hasrefs;
2412 int wake = 0; 2422 int wake = 0;
2413 2423
2414 if (!dlm_grab(dlm)) 2424 if (!dlm_grab(dlm))
@@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2417 name = res->lockname.name; 2427 name = res->lockname.name;
2418 namelen = res->lockname.len; 2428 namelen = res->lockname.len;
2419 2429
2420 mlog(0, "migrating %.*s to %u\n", namelen, name, target); 2430 mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
2421 2431
2422 /* 2432 /*
2423 * ensure this lockres is a proper candidate for migration 2433 * ensure this lockres is a proper candidate for migration
2424 */ 2434 */
2425 spin_lock(&res->spinlock); 2435 spin_lock(&res->spinlock);
2426 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2436 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2427 if (ret < 0) { 2437 if (ret < 0) {
2428 spin_unlock(&res->spinlock); 2438 spin_unlock(&res->spinlock);
2429 goto leave; 2439 goto leave;
@@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2431 spin_unlock(&res->spinlock); 2441 spin_unlock(&res->spinlock);
2432 2442
2433 /* no work to do */ 2443 /* no work to do */
2434 if (numlocks == 0) { 2444 if (numlocks == 0 && !hasrefs)
2435 mlog(0, "no locks were found on this lockres! done!\n");
2436 goto leave; 2445 goto leave;
2437 }
2438 2446
2439 /* 2447 /*
2440 * preallocate up front 2448 * preallocate up front
@@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2459 * find a node to migrate the lockres to 2467 * find a node to migrate the lockres to
2460 */ 2468 */
2461 2469
2462 mlog(0, "picking a migration node\n");
2463 spin_lock(&dlm->spinlock); 2470 spin_lock(&dlm->spinlock);
2464 /* pick a new node */ 2471 /* pick a new node */
2465 if (!test_bit(target, dlm->domain_map) || 2472 if (!test_bit(target, dlm->domain_map) ||
2466 target >= O2NM_MAX_NODES) { 2473 target >= O2NM_MAX_NODES) {
2467 target = dlm_pick_migration_target(dlm, res); 2474 target = dlm_pick_migration_target(dlm, res);
2468 } 2475 }
2469 mlog(0, "node %u chosen for migration\n", target); 2476 mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
2477 namelen, name, target);
2470 2478
2471 if (target >= O2NM_MAX_NODES || 2479 if (target >= O2NM_MAX_NODES ||
2472 !test_bit(target, dlm->domain_map)) { 2480 !test_bit(target, dlm->domain_map)) {
@@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2667{ 2675{
2668 int ret; 2676 int ret;
2669 int lock_dropped = 0; 2677 int lock_dropped = 0;
2670 int numlocks; 2678 int numlocks, hasrefs;
2671 2679
2672 spin_lock(&res->spinlock); 2680 spin_lock(&res->spinlock);
2673 if (res->owner != dlm->node_num) { 2681 if (res->owner != dlm->node_num) {
@@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2681 } 2689 }
2682 2690
2683 /* No need to migrate a lockres having no locks */ 2691 /* No need to migrate a lockres having no locks */
2684 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2692 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2685 if (ret >= 0 && numlocks == 0) { 2693 if (ret >= 0 && numlocks == 0 && !hasrefs) {
2686 spin_unlock(&res->spinlock); 2694 spin_unlock(&res->spinlock);
2687 goto leave; 2695 goto leave;
2688 } 2696 }
@@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
2915 } 2923 }
2916 queue++; 2924 queue++;
2917 } 2925 }
2926
2927 nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2928 if (nodenum < O2NM_MAX_NODES) {
2929 spin_unlock(&res->spinlock);
2930 return nodenum;
2931 }
2918 spin_unlock(&res->spinlock); 2932 spin_unlock(&res->spinlock);
2919 mlog(0, "have not found a suitable target yet! checking domain map\n"); 2933 mlog(0, "have not found a suitable target yet! checking domain map\n");
2920 2934
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04a2809..f6cba566429d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2241,11 +2241,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2241 2241
2242 mutex_lock(&inode->i_mutex); 2242 mutex_lock(&inode->i_mutex);
2243 2243
2244 ocfs2_iocb_clear_sem_locked(iocb);
2245
2244relock: 2246relock:
2245 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 2247 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
2246 if (direct_io) { 2248 if (direct_io) {
2247 down_read(&inode->i_alloc_sem); 2249 down_read(&inode->i_alloc_sem);
2248 have_alloc_sem = 1; 2250 have_alloc_sem = 1;
2251 /* communicate with ocfs2_dio_end_io */
2252 ocfs2_iocb_set_sem_locked(iocb);
2249 } 2253 }
2250 2254
2251 /* 2255 /*
@@ -2382,8 +2386,10 @@ out:
2382 ocfs2_rw_unlock(inode, rw_level); 2386 ocfs2_rw_unlock(inode, rw_level);
2383 2387
2384out_sems: 2388out_sems:
2385 if (have_alloc_sem) 2389 if (have_alloc_sem) {
2386 up_read(&inode->i_alloc_sem); 2390 up_read(&inode->i_alloc_sem);
2391 ocfs2_iocb_clear_sem_locked(iocb);
2392 }
2387 2393
2388 mutex_unlock(&inode->i_mutex); 2394 mutex_unlock(&inode->i_mutex);
2389 2395
@@ -2527,6 +2533,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2527 goto bail; 2533 goto bail;
2528 } 2534 }
2529 2535
2536 ocfs2_iocb_clear_sem_locked(iocb);
2537
2530 /* 2538 /*
2531 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2539 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2532 * need locks to protect pending reads from racing with truncate. 2540 * need locks to protect pending reads from racing with truncate.
@@ -2534,6 +2542,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2534 if (filp->f_flags & O_DIRECT) { 2542 if (filp->f_flags & O_DIRECT) {
2535 down_read(&inode->i_alloc_sem); 2543 down_read(&inode->i_alloc_sem);
2536 have_alloc_sem = 1; 2544 have_alloc_sem = 1;
2545 ocfs2_iocb_set_sem_locked(iocb);
2537 2546
2538 ret = ocfs2_rw_lock(inode, 0); 2547 ret = ocfs2_rw_lock(inode, 0);
2539 if (ret < 0) { 2548 if (ret < 0) {
@@ -2575,8 +2584,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2575 } 2584 }
2576 2585
2577bail: 2586bail:
2578 if (have_alloc_sem) 2587 if (have_alloc_sem) {
2579 up_read(&inode->i_alloc_sem); 2588 up_read(&inode->i_alloc_sem);
2589 ocfs2_iocb_clear_sem_locked(iocb);
2590 }
2580 if (rw_level != -1) 2591 if (rw_level != -1)
2581 ocfs2_rw_unlock(inode, rw_level); 2592 ocfs2_rw_unlock(inode, rw_level);
2582 mlog_exit(ret); 2593 mlog_exit(ret);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c2e4f8222e2f..bf2e7764920e 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -350,7 +350,7 @@ enum {
350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE 350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
351 NUM_SYSTEM_INODES 351 NUM_SYSTEM_INODES
352}; 352};
353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE 353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE
354#define NUM_LOCAL_SYSTEM_INODES \ 354#define NUM_LOCAL_SYSTEM_INODES \
355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) 355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
356 356