aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-03-26 18:23:24 -0400
committerDavid S. Miller <davem@davemloft.net>2009-03-26 18:23:24 -0400
commit08abe18af1f78ee80c3c3a5ac47c3e0ae0beadf6 (patch)
tree2be39bf8942edca1bcec735145e144a682ca9cd3 /fs
parentf0de70f8bb56952f6e016a65a8a8d006918f5bf6 (diff)
parent0384e2959127a56d0640505d004d8dd92f9c29f5 (diff)
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Conflicts: drivers/net/wimax/i2400m/usb-notif.c
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c42
-rw-r--r--fs/bio-integrity.c5
-rw-r--r--fs/bio.c6
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent-tree.c49
-rw-r--r--fs/btrfs/locking.c6
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/buffer.c23
-rw-r--r--fs/compat.c3
-rw-r--r--fs/devpts/inode.c5
-rw-r--r--fs/ecryptfs/crypto.c51
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h4
-rw-r--r--fs/ecryptfs/inode.c32
-rw-r--r--fs/ecryptfs/keystore.c3
-rw-r--r--fs/ecryptfs/main.c5
-rw-r--r--fs/exec.c13
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/ialloc.c12
-rw-r--r--fs/ext4/mballoc.c13
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/gfs2/Kconfig17
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/acl.c1
-rw-r--r--fs/gfs2/bmap.c1
-rw-r--r--fs/gfs2/dir.c1
-rw-r--r--fs/gfs2/eaops.c1
-rw-r--r--fs/gfs2/eattr.c1
-rw-r--r--fs/gfs2/glock.c268
-rw-r--r--fs/gfs2/glock.h127
-rw-r--r--fs/gfs2/glops.c160
-rw-r--r--fs/gfs2/glops.h1
-rw-r--r--fs/gfs2/incore.h71
-rw-r--r--fs/gfs2/inode.c13
-rw-r--r--fs/gfs2/inode.h22
-rw-r--r--fs/gfs2/lock_dlm.c241
-rw-r--r--fs/gfs2/locking.c232
-rw-r--r--fs/gfs2/locking/dlm/Makefile3
-rw-r--r--fs/gfs2/locking/dlm/lock.c708
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h166
-rw-r--r--fs/gfs2/locking/dlm/main.c48
-rw-r--r--fs/gfs2/locking/dlm/mount.c276
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c226
-rw-r--r--fs/gfs2/locking/dlm/thread.c68
-rw-r--r--fs/gfs2/log.c1
-rw-r--r--fs/gfs2/lops.c1
-rw-r--r--fs/gfs2/main.c13
-rw-r--r--fs/gfs2/meta_io.c22
-rw-r--r--fs/gfs2/meta_io.h1
-rw-r--r--fs/gfs2/mount.c128
-rw-r--r--fs/gfs2/mount.h17
-rw-r--r--fs/gfs2/ops_address.c5
-rw-r--r--fs/gfs2/ops_dentry.c1
-rw-r--r--fs/gfs2/ops_export.c1
-rw-r--r--fs/gfs2/ops_file.c76
-rw-r--r--fs/gfs2/ops_fstype.c156
-rw-r--r--fs/gfs2/ops_inode.c1
-rw-r--r--fs/gfs2/ops_super.c44
-rw-r--r--fs/gfs2/quota.c203
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/recovery.c28
-rw-r--r--fs/gfs2/rgrp.c189
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/super.h26
-rw-r--r--fs/gfs2/sys.c236
-rw-r--r--fs/gfs2/trans.c19
-rw-r--r--fs/gfs2/util.c11
-rw-r--r--fs/inode.c78
-rw-r--r--fs/lockd/clntlock.c51
-rw-r--r--fs/minix/inode.c2
-rw-r--r--fs/namei.c8
-rw-r--r--fs/namespace.c11
-rw-r--r--fs/nfs/client.c73
-rw-r--r--fs/nfs/dir.c8
-rw-r--r--fs/nfs/nfs3acl.c27
-rw-r--r--fs/nfs/nfs3xdr.c34
-rw-r--r--fs/nfs/nfs4namespace.c15
-rw-r--r--fs/nfsd/nfs4xdr.c1
-rw-r--r--fs/ocfs2/alloc.c3
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/namei.c3
-rw-r--r--fs/ocfs2/ocfs2_fs.h6
-rw-r--r--fs/ocfs2/xattr.c30
-rw-r--r--fs/partitions/check.c10
-rw-r--r--fs/pipe.c8
-rw-r--r--fs/proc/base.c16
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/squashfs/block.c18
-rw-r--r--fs/squashfs/cache.c4
-rw-r--r--fs/squashfs/inode.c6
-rw-r--r--fs/squashfs/squashfs.h2
-rw-r--r--fs/squashfs/super.c2
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysfs/bin.c253
-rw-r--r--fs/sysfs/dir.c33
-rw-r--r--fs/sysfs/file.c26
-rw-r--r--fs/sysfs/inode.c17
-rw-r--r--fs/sysfs/mount.c6
-rw-r--r--fs/sysfs/sysfs.h3
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c10
-rw-r--r--fs/xfs/xfs_iget.c15
-rw-r--r--fs/xfs/xfs_log_recover.c17
110 files changed, 2251 insertions, 2745 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 8fa77e233944..76da12537956 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -443,7 +443,7 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
443 req->private = NULL; 443 req->private = NULL;
444 req->ki_iovec = NULL; 444 req->ki_iovec = NULL;
445 INIT_LIST_HEAD(&req->ki_run_list); 445 INIT_LIST_HEAD(&req->ki_run_list);
446 req->ki_eventfd = ERR_PTR(-EINVAL); 446 req->ki_eventfd = NULL;
447 447
448 /* Check if the completion queue has enough free space to 448 /* Check if the completion queue has enough free space to
449 * accept an event from this io. 449 * accept an event from this io.
@@ -485,8 +485,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
485{ 485{
486 assert_spin_locked(&ctx->ctx_lock); 486 assert_spin_locked(&ctx->ctx_lock);
487 487
488 if (!IS_ERR(req->ki_eventfd))
489 fput(req->ki_eventfd);
490 if (req->ki_dtor) 488 if (req->ki_dtor)
491 req->ki_dtor(req); 489 req->ki_dtor(req);
492 if (req->ki_iovec != &req->ki_inline_vec) 490 if (req->ki_iovec != &req->ki_inline_vec)
@@ -508,8 +506,11 @@ static void aio_fput_routine(struct work_struct *data)
508 list_del(&req->ki_list); 506 list_del(&req->ki_list);
509 spin_unlock_irq(&fput_lock); 507 spin_unlock_irq(&fput_lock);
510 508
511 /* Complete the fput */ 509 /* Complete the fput(s) */
512 __fput(req->ki_filp); 510 if (req->ki_filp != NULL)
511 __fput(req->ki_filp);
512 if (req->ki_eventfd != NULL)
513 __fput(req->ki_eventfd);
513 514
514 /* Link the iocb into the context's free list */ 515 /* Link the iocb into the context's free list */
515 spin_lock_irq(&ctx->ctx_lock); 516 spin_lock_irq(&ctx->ctx_lock);
@@ -527,12 +528,14 @@ static void aio_fput_routine(struct work_struct *data)
527 */ 528 */
528static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) 529static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
529{ 530{
531 int schedule_putreq = 0;
532
530 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", 533 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
531 req, atomic_long_read(&req->ki_filp->f_count)); 534 req, atomic_long_read(&req->ki_filp->f_count));
532 535
533 assert_spin_locked(&ctx->ctx_lock); 536 assert_spin_locked(&ctx->ctx_lock);
534 537
535 req->ki_users --; 538 req->ki_users--;
536 BUG_ON(req->ki_users < 0); 539 BUG_ON(req->ki_users < 0);
537 if (likely(req->ki_users)) 540 if (likely(req->ki_users))
538 return 0; 541 return 0;
@@ -540,10 +543,23 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
540 req->ki_cancel = NULL; 543 req->ki_cancel = NULL;
541 req->ki_retry = NULL; 544 req->ki_retry = NULL;
542 545
543 /* Must be done under the lock to serialise against cancellation. 546 /*
544 * Call this aio_fput as it duplicates fput via the fput_work. 547 * Try to optimize the aio and eventfd file* puts, by avoiding to
548 * schedule work in case it is not __fput() time. In normal cases,
549 * we would not be holding the last reference to the file*, so
550 * this function will be executed w/out any aio kthread wakeup.
545 */ 551 */
546 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { 552 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
553 schedule_putreq++;
554 else
555 req->ki_filp = NULL;
556 if (req->ki_eventfd != NULL) {
557 if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
558 schedule_putreq++;
559 else
560 req->ki_eventfd = NULL;
561 }
562 if (unlikely(schedule_putreq)) {
547 get_ioctx(ctx); 563 get_ioctx(ctx);
548 spin_lock(&fput_lock); 564 spin_lock(&fput_lock);
549 list_add(&req->ki_list, &fput_head); 565 list_add(&req->ki_list, &fput_head);
@@ -571,7 +587,7 @@ int aio_put_req(struct kiocb *req)
571static struct kioctx *lookup_ioctx(unsigned long ctx_id) 587static struct kioctx *lookup_ioctx(unsigned long ctx_id)
572{ 588{
573 struct mm_struct *mm = current->mm; 589 struct mm_struct *mm = current->mm;
574 struct kioctx *ctx = NULL; 590 struct kioctx *ctx, *ret = NULL;
575 struct hlist_node *n; 591 struct hlist_node *n;
576 592
577 rcu_read_lock(); 593 rcu_read_lock();
@@ -579,12 +595,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
579 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { 595 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
580 if (ctx->user_id == ctx_id && !ctx->dead) { 596 if (ctx->user_id == ctx_id && !ctx->dead) {
581 get_ioctx(ctx); 597 get_ioctx(ctx);
598 ret = ctx;
582 break; 599 break;
583 } 600 }
584 } 601 }
585 602
586 rcu_read_unlock(); 603 rcu_read_unlock();
587 return ctx; 604 return ret;
588} 605}
589 606
590/* 607/*
@@ -1009,7 +1026,7 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
1009 * eventfd. The eventfd_signal() function is safe to be called 1026 * eventfd. The eventfd_signal() function is safe to be called
1010 * from IRQ context. 1027 * from IRQ context.
1011 */ 1028 */
1012 if (!IS_ERR(iocb->ki_eventfd)) 1029 if (iocb->ki_eventfd != NULL)
1013 eventfd_signal(iocb->ki_eventfd, 1); 1030 eventfd_signal(iocb->ki_eventfd, 1);
1014 1031
1015put_rq: 1032put_rq:
@@ -1608,6 +1625,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1608 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); 1625 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
1609 if (IS_ERR(req->ki_eventfd)) { 1626 if (IS_ERR(req->ki_eventfd)) {
1610 ret = PTR_ERR(req->ki_eventfd); 1627 ret = PTR_ERR(req->ki_eventfd);
1628 req->ki_eventfd = NULL;
1611 goto out_put_req; 1629 goto out_put_req;
1612 } 1630 }
1613 } 1631 }
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 549b0144da11..fe2b1aa2464e 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -685,19 +685,20 @@ EXPORT_SYMBOL(bio_integrity_split);
685 * bio_integrity_clone - Callback for cloning bios with integrity metadata 685 * bio_integrity_clone - Callback for cloning bios with integrity metadata
686 * @bio: New bio 686 * @bio: New bio
687 * @bio_src: Original bio 687 * @bio_src: Original bio
688 * @gfp_mask: Memory allocation mask
688 * @bs: bio_set to allocate bip from 689 * @bs: bio_set to allocate bip from
689 * 690 *
690 * Description: Called to allocate a bip when cloning a bio 691 * Description: Called to allocate a bip when cloning a bio
691 */ 692 */
692int bio_integrity_clone(struct bio *bio, struct bio *bio_src, 693int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
693 struct bio_set *bs) 694 gfp_t gfp_mask, struct bio_set *bs)
694{ 695{
695 struct bio_integrity_payload *bip_src = bio_src->bi_integrity; 696 struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
696 struct bio_integrity_payload *bip; 697 struct bio_integrity_payload *bip;
697 698
698 BUG_ON(bip_src == NULL); 699 BUG_ON(bip_src == NULL);
699 700
700 bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs); 701 bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
701 702
702 if (bip == NULL) 703 if (bip == NULL)
703 return -EIO; 704 return -EIO;
diff --git a/fs/bio.c b/fs/bio.c
index 124b95c4d582..d4f06327c810 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -463,10 +463,12 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
463 if (bio_integrity(bio)) { 463 if (bio_integrity(bio)) {
464 int ret; 464 int ret;
465 465
466 ret = bio_integrity_clone(b, bio, fs_bio_set); 466 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
467 467
468 if (ret < 0) 468 if (ret < 0) {
469 bio_put(b);
469 return NULL; 470 return NULL;
471 }
470 } 472 }
471 473
472 return b; 474 return b;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 42491d728e99..37f31b5529aa 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -277,7 +277,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
277 if (*cow_ret == buf) 277 if (*cow_ret == buf)
278 unlock_orig = 1; 278 unlock_orig = 1;
279 279
280 WARN_ON(!btrfs_tree_locked(buf)); 280 btrfs_assert_tree_locked(buf);
281 281
282 if (parent) 282 if (parent)
283 parent_start = parent->start; 283 parent_start = parent->start;
@@ -2365,7 +2365,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2365 if (slot >= btrfs_header_nritems(upper) - 1) 2365 if (slot >= btrfs_header_nritems(upper) - 1)
2366 return 1; 2366 return 1;
2367 2367
2368 WARN_ON(!btrfs_tree_locked(path->nodes[1])); 2368 btrfs_assert_tree_locked(path->nodes[1]);
2369 2369
2370 right = read_node_slot(root, upper, slot + 1); 2370 right = read_node_slot(root, upper, slot + 1);
2371 btrfs_tree_lock(right); 2371 btrfs_tree_lock(right);
@@ -2562,7 +2562,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2562 if (right_nritems == 0) 2562 if (right_nritems == 0)
2563 return 1; 2563 return 1;
2564 2564
2565 WARN_ON(!btrfs_tree_locked(path->nodes[1])); 2565 btrfs_assert_tree_locked(path->nodes[1]);
2566 2566
2567 left = read_node_slot(root, path->nodes[1], slot - 1); 2567 left = read_node_slot(root, path->nodes[1], slot - 1);
2568 btrfs_tree_lock(left); 2568 btrfs_tree_lock(left);
@@ -4101,7 +4101,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4101 4101
4102 next = read_node_slot(root, c, slot); 4102 next = read_node_slot(root, c, slot);
4103 if (!path->skip_locking) { 4103 if (!path->skip_locking) {
4104 WARN_ON(!btrfs_tree_locked(c)); 4104 btrfs_assert_tree_locked(c);
4105 btrfs_tree_lock(next); 4105 btrfs_tree_lock(next);
4106 btrfs_set_lock_blocking(next); 4106 btrfs_set_lock_blocking(next);
4107 } 4107 }
@@ -4126,7 +4126,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4126 reada_for_search(root, path, level, slot, 0); 4126 reada_for_search(root, path, level, slot, 0);
4127 next = read_node_slot(root, next, 0); 4127 next = read_node_slot(root, next, 0);
4128 if (!path->skip_locking) { 4128 if (!path->skip_locking) {
4129 WARN_ON(!btrfs_tree_locked(path->nodes[level])); 4129 btrfs_assert_tree_locked(path->nodes[level]);
4130 btrfs_tree_lock(next); 4130 btrfs_tree_lock(next);
4131 btrfs_set_lock_blocking(next); 4131 btrfs_set_lock_blocking(next);
4132 } 4132 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 82491ba8fa40..5e1d4e30e9d8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -784,7 +784,14 @@ struct btrfs_fs_info {
784 struct list_head dirty_cowonly_roots; 784 struct list_head dirty_cowonly_roots;
785 785
786 struct btrfs_fs_devices *fs_devices; 786 struct btrfs_fs_devices *fs_devices;
787
788 /*
789 * the space_info list is almost entirely read only. It only changes
790 * when we add a new raid type to the FS, and that happens
791 * very rarely. RCU is used to protect it.
792 */
787 struct list_head space_info; 793 struct list_head space_info;
794
788 spinlock_t delalloc_lock; 795 spinlock_t delalloc_lock;
789 spinlock_t new_trans_lock; 796 spinlock_t new_trans_lock;
790 u64 delalloc_bytes; 797 u64 delalloc_bytes;
@@ -1797,6 +1804,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
1797int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); 1804int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
1798u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); 1805u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
1799void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); 1806void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
1807void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
1808
1800int btrfs_check_metadata_free_space(struct btrfs_root *root); 1809int btrfs_check_metadata_free_space(struct btrfs_root *root);
1801int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 1810int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
1802 u64 bytes); 1811 u64 bytes);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index adda739a0215..3e18175248e0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -857,7 +857,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
857 struct inode *btree_inode = root->fs_info->btree_inode; 857 struct inode *btree_inode = root->fs_info->btree_inode;
858 if (btrfs_header_generation(buf) == 858 if (btrfs_header_generation(buf) ==
859 root->fs_info->running_transaction->transid) { 859 root->fs_info->running_transaction->transid) {
860 WARN_ON(!btrfs_tree_locked(buf)); 860 btrfs_assert_tree_locked(buf);
861 861
862 /* ugh, clear_extent_buffer_dirty can be expensive */ 862 /* ugh, clear_extent_buffer_dirty can be expensive */
863 btrfs_set_lock_blocking(buf); 863 btrfs_set_lock_blocking(buf);
@@ -2361,7 +2361,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2361 2361
2362 btrfs_set_lock_blocking(buf); 2362 btrfs_set_lock_blocking(buf);
2363 2363
2364 WARN_ON(!btrfs_tree_locked(buf)); 2364 btrfs_assert_tree_locked(buf);
2365 if (transid != root->fs_info->generation) { 2365 if (transid != root->fs_info->generation) {
2366 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " 2366 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
2367 "found %llu running %llu\n", 2367 "found %llu running %llu\n",
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6b5966aacf44..fefe83ad2059 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -20,6 +20,7 @@
20#include <linux/writeback.h> 20#include <linux/writeback.h>
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/sort.h> 22#include <linux/sort.h>
23#include <linux/rcupdate.h>
23#include "compat.h" 24#include "compat.h"
24#include "hash.h" 25#include "hash.h"
25#include "crc32c.h" 26#include "crc32c.h"
@@ -330,13 +331,33 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
330{ 331{
331 struct list_head *head = &info->space_info; 332 struct list_head *head = &info->space_info;
332 struct btrfs_space_info *found; 333 struct btrfs_space_info *found;
333 list_for_each_entry(found, head, list) { 334
334 if (found->flags == flags) 335 rcu_read_lock();
336 list_for_each_entry_rcu(found, head, list) {
337 if (found->flags == flags) {
338 rcu_read_unlock();
335 return found; 339 return found;
340 }
336 } 341 }
342 rcu_read_unlock();
337 return NULL; 343 return NULL;
338} 344}
339 345
346/*
347 * after adding space to the filesystem, we need to clear the full flags
348 * on all the space infos.
349 */
350void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
351{
352 struct list_head *head = &info->space_info;
353 struct btrfs_space_info *found;
354
355 rcu_read_lock();
356 list_for_each_entry_rcu(found, head, list)
357 found->full = 0;
358 rcu_read_unlock();
359}
360
340static u64 div_factor(u64 num, int factor) 361static u64 div_factor(u64 num, int factor)
341{ 362{
342 if (factor == 10) 363 if (factor == 10)
@@ -1903,7 +1924,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1903 if (!found) 1924 if (!found)
1904 return -ENOMEM; 1925 return -ENOMEM;
1905 1926
1906 list_add(&found->list, &info->space_info);
1907 INIT_LIST_HEAD(&found->block_groups); 1927 INIT_LIST_HEAD(&found->block_groups);
1908 init_rwsem(&found->groups_sem); 1928 init_rwsem(&found->groups_sem);
1909 spin_lock_init(&found->lock); 1929 spin_lock_init(&found->lock);
@@ -1917,6 +1937,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1917 found->full = 0; 1937 found->full = 0;
1918 found->force_alloc = 0; 1938 found->force_alloc = 0;
1919 *space_info = found; 1939 *space_info = found;
1940 list_add_rcu(&found->list, &info->space_info);
1920 return 0; 1941 return 0;
1921} 1942}
1922 1943
@@ -4418,13 +4439,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4418 path = btrfs_alloc_path(); 4439 path = btrfs_alloc_path();
4419 BUG_ON(!path); 4440 BUG_ON(!path);
4420 4441
4421 BUG_ON(!btrfs_tree_locked(parent)); 4442 btrfs_assert_tree_locked(parent);
4422 parent_level = btrfs_header_level(parent); 4443 parent_level = btrfs_header_level(parent);
4423 extent_buffer_get(parent); 4444 extent_buffer_get(parent);
4424 path->nodes[parent_level] = parent; 4445 path->nodes[parent_level] = parent;
4425 path->slots[parent_level] = btrfs_header_nritems(parent); 4446 path->slots[parent_level] = btrfs_header_nritems(parent);
4426 4447
4427 BUG_ON(!btrfs_tree_locked(node)); 4448 btrfs_assert_tree_locked(node);
4428 level = btrfs_header_level(node); 4449 level = btrfs_header_level(node);
4429 extent_buffer_get(node); 4450 extent_buffer_get(node);
4430 path->nodes[level] = node; 4451 path->nodes[level] = node;
@@ -6320,6 +6341,7 @@ out:
6320int btrfs_free_block_groups(struct btrfs_fs_info *info) 6341int btrfs_free_block_groups(struct btrfs_fs_info *info)
6321{ 6342{
6322 struct btrfs_block_group_cache *block_group; 6343 struct btrfs_block_group_cache *block_group;
6344 struct btrfs_space_info *space_info;
6323 struct rb_node *n; 6345 struct rb_node *n;
6324 6346
6325 spin_lock(&info->block_group_cache_lock); 6347 spin_lock(&info->block_group_cache_lock);
@@ -6341,6 +6363,23 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
6341 spin_lock(&info->block_group_cache_lock); 6363 spin_lock(&info->block_group_cache_lock);
6342 } 6364 }
6343 spin_unlock(&info->block_group_cache_lock); 6365 spin_unlock(&info->block_group_cache_lock);
6366
6367 /* now that all the block groups are freed, go through and
6368 * free all the space_info structs. This is only called during
6369 * the final stages of unmount, and so we know nobody is
6370 * using them. We call synchronize_rcu() once before we start,
6371 * just to be on the safe side.
6372 */
6373 synchronize_rcu();
6374
6375 while(!list_empty(&info->space_info)) {
6376 space_info = list_entry(info->space_info.next,
6377 struct btrfs_space_info,
6378 list);
6379
6380 list_del(&space_info->list);
6381 kfree(space_info);
6382 }
6344 return 0; 6383 return 0;
6345} 6384}
6346 6385
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 85506c4a3af7..47b0a88c12a2 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -220,8 +220,8 @@ int btrfs_tree_unlock(struct extent_buffer *eb)
220 return 0; 220 return 0;
221} 221}
222 222
223int btrfs_tree_locked(struct extent_buffer *eb) 223void btrfs_assert_tree_locked(struct extent_buffer *eb)
224{ 224{
225 return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || 225 if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
226 spin_is_locked(&eb->lock); 226 assert_spin_locked(&eb->lock);
227} 227}
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 6bb0afbff928..6c4ce457168c 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -21,11 +21,11 @@
21 21
22int btrfs_tree_lock(struct extent_buffer *eb); 22int btrfs_tree_lock(struct extent_buffer *eb);
23int btrfs_tree_unlock(struct extent_buffer *eb); 23int btrfs_tree_unlock(struct extent_buffer *eb);
24int btrfs_tree_locked(struct extent_buffer *eb);
25 24
26int btrfs_try_tree_lock(struct extent_buffer *eb); 25int btrfs_try_tree_lock(struct extent_buffer *eb);
27int btrfs_try_spin_lock(struct extent_buffer *eb); 26int btrfs_try_spin_lock(struct extent_buffer *eb);
28 27
29void btrfs_set_lock_blocking(struct extent_buffer *eb); 28void btrfs_set_lock_blocking(struct extent_buffer *eb);
30void btrfs_clear_lock_blocking(struct extent_buffer *eb); 29void btrfs_clear_lock_blocking(struct extent_buffer *eb);
30void btrfs_assert_tree_locked(struct extent_buffer *eb);
31#endif 31#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1316139bf9e8..dd06e18e5aac 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1374,6 +1374,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1374 ret = btrfs_add_device(trans, root, device); 1374 ret = btrfs_add_device(trans, root, device);
1375 } 1375 }
1376 1376
1377 /*
1378 * we've got more storage, clear any full flags on the space
1379 * infos
1380 */
1381 btrfs_clear_space_info_full(root->fs_info);
1382
1377 unlock_chunks(root); 1383 unlock_chunks(root);
1378 btrfs_commit_transaction(trans, root); 1384 btrfs_commit_transaction(trans, root);
1379 1385
@@ -1459,6 +1465,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
1459 device->fs_devices->total_rw_bytes += diff; 1465 device->fs_devices->total_rw_bytes += diff;
1460 1466
1461 device->total_bytes = new_size; 1467 device->total_bytes = new_size;
1468 btrfs_clear_space_info_full(device->dev_root->fs_info);
1469
1462 return btrfs_update_device(trans, device); 1470 return btrfs_update_device(trans, device);
1463} 1471}
1464 1472
diff --git a/fs/buffer.c b/fs/buffer.c
index 9f697419ed8e..891e1c78e4f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -760,15 +760,9 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
760 * If warn is true, then emit a warning if the page is not uptodate and has 760 * If warn is true, then emit a warning if the page is not uptodate and has
761 * not been truncated. 761 * not been truncated.
762 */ 762 */
763static int __set_page_dirty(struct page *page, 763static void __set_page_dirty(struct page *page,
764 struct address_space *mapping, int warn) 764 struct address_space *mapping, int warn)
765{ 765{
766 if (unlikely(!mapping))
767 return !TestSetPageDirty(page);
768
769 if (TestSetPageDirty(page))
770 return 0;
771
772 spin_lock_irq(&mapping->tree_lock); 766 spin_lock_irq(&mapping->tree_lock);
773 if (page->mapping) { /* Race with truncate? */ 767 if (page->mapping) { /* Race with truncate? */
774 WARN_ON_ONCE(warn && !PageUptodate(page)); 768 WARN_ON_ONCE(warn && !PageUptodate(page));
@@ -785,8 +779,6 @@ static int __set_page_dirty(struct page *page,
785 } 779 }
786 spin_unlock_irq(&mapping->tree_lock); 780 spin_unlock_irq(&mapping->tree_lock);
787 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 781 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
788
789 return 1;
790} 782}
791 783
792/* 784/*
@@ -816,6 +808,7 @@ static int __set_page_dirty(struct page *page,
816 */ 808 */
817int __set_page_dirty_buffers(struct page *page) 809int __set_page_dirty_buffers(struct page *page)
818{ 810{
811 int newly_dirty;
819 struct address_space *mapping = page_mapping(page); 812 struct address_space *mapping = page_mapping(page);
820 813
821 if (unlikely(!mapping)) 814 if (unlikely(!mapping))
@@ -831,9 +824,12 @@ int __set_page_dirty_buffers(struct page *page)
831 bh = bh->b_this_page; 824 bh = bh->b_this_page;
832 } while (bh != head); 825 } while (bh != head);
833 } 826 }
827 newly_dirty = !TestSetPageDirty(page);
834 spin_unlock(&mapping->private_lock); 828 spin_unlock(&mapping->private_lock);
835 829
836 return __set_page_dirty(page, mapping, 1); 830 if (newly_dirty)
831 __set_page_dirty(page, mapping, 1);
832 return newly_dirty;
837} 833}
838EXPORT_SYMBOL(__set_page_dirty_buffers); 834EXPORT_SYMBOL(__set_page_dirty_buffers);
839 835
@@ -1262,8 +1258,11 @@ void mark_buffer_dirty(struct buffer_head *bh)
1262 return; 1258 return;
1263 } 1259 }
1264 1260
1265 if (!test_set_buffer_dirty(bh)) 1261 if (!test_set_buffer_dirty(bh)) {
1266 __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0); 1262 struct page *page = bh->b_page;
1263 if (!TestSetPageDirty(page))
1264 __set_page_dirty(page, page_mapping(page), 0);
1265 }
1267} 1266}
1268 1267
1269/* 1268/*
diff --git a/fs/compat.c b/fs/compat.c
index d0145ca27572..0949b43794a4 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1402,6 +1402,7 @@ int compat_do_execve(char * filename,
1402 retval = mutex_lock_interruptible(&current->cred_exec_mutex); 1402 retval = mutex_lock_interruptible(&current->cred_exec_mutex);
1403 if (retval < 0) 1403 if (retval < 0)
1404 goto out_free; 1404 goto out_free;
1405 current->in_execve = 1;
1405 1406
1406 retval = -ENOMEM; 1407 retval = -ENOMEM;
1407 bprm->cred = prepare_exec_creds(); 1408 bprm->cred = prepare_exec_creds();
@@ -1454,6 +1455,7 @@ int compat_do_execve(char * filename,
1454 goto out; 1455 goto out;
1455 1456
1456 /* execve succeeded */ 1457 /* execve succeeded */
1458 current->in_execve = 0;
1457 mutex_unlock(&current->cred_exec_mutex); 1459 mutex_unlock(&current->cred_exec_mutex);
1458 acct_update_integrals(current); 1460 acct_update_integrals(current);
1459 free_bprm(bprm); 1461 free_bprm(bprm);
@@ -1470,6 +1472,7 @@ out_file:
1470 } 1472 }
1471 1473
1472out_unlock: 1474out_unlock:
1475 current->in_execve = 0;
1473 mutex_unlock(&current->cred_exec_mutex); 1476 mutex_unlock(&current->cred_exec_mutex);
1474 1477
1475out_free: 1478out_free:
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5f3231b9633f..bff4052b05e7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -198,9 +198,6 @@ static int mknod_ptmx(struct super_block *sb)
198 198
199 fsi->ptmx_dentry = dentry; 199 fsi->ptmx_dentry = dentry;
200 rc = 0; 200 rc = 0;
201
202 printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n",
203 inode->i_ino);
204out: 201out:
205 mutex_unlock(&root->d_inode->i_mutex); 202 mutex_unlock(&root->d_inode->i_mutex);
206 return rc; 203 return rc;
@@ -369,8 +366,6 @@ static int new_pts_mount(struct file_system_type *fs_type, int flags,
369 struct pts_fs_info *fsi; 366 struct pts_fs_info *fsi;
370 struct pts_mount_opts *opts; 367 struct pts_mount_opts *opts;
371 368
372 printk(KERN_NOTICE "devpts: newinstance mount\n");
373
374 err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt); 369 err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt);
375 if (err) 370 if (err)
376 return err; 371 return err;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f6caeb1d1106..8b65f289ee00 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -946,6 +946,8 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
946 list_for_each_entry(global_auth_tok, 946 list_for_each_entry(global_auth_tok,
947 &mount_crypt_stat->global_auth_tok_list, 947 &mount_crypt_stat->global_auth_tok_list,
948 mount_crypt_stat_list) { 948 mount_crypt_stat_list) {
949 if (global_auth_tok->flags & ECRYPTFS_AUTH_TOK_FNEK)
950 continue;
949 rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig); 951 rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig);
950 if (rc) { 952 if (rc) {
951 printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc); 953 printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc);
@@ -1322,14 +1324,13 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
1322} 1324}
1323 1325
1324static int 1326static int
1325ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, 1327ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry,
1326 struct dentry *ecryptfs_dentry, 1328 char *virt, size_t virt_len)
1327 char *virt)
1328{ 1329{
1329 int rc; 1330 int rc;
1330 1331
1331 rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt, 1332 rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt,
1332 0, crypt_stat->num_header_bytes_at_front); 1333 0, virt_len);
1333 if (rc) 1334 if (rc)
1334 printk(KERN_ERR "%s: Error attempting to write header " 1335 printk(KERN_ERR "%s: Error attempting to write header "
1335 "information to lower file; rc = [%d]\n", __func__, 1336 "information to lower file; rc = [%d]\n", __func__,
@@ -1339,7 +1340,6 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
1339 1340
1340static int 1341static int
1341ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, 1342ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
1342 struct ecryptfs_crypt_stat *crypt_stat,
1343 char *page_virt, size_t size) 1343 char *page_virt, size_t size)
1344{ 1344{
1345 int rc; 1345 int rc;
@@ -1349,6 +1349,17 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
1349 return rc; 1349 return rc;
1350} 1350}
1351 1351
1352static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
1353 unsigned int order)
1354{
1355 struct page *page;
1356
1357 page = alloc_pages(gfp_mask | __GFP_ZERO, order);
1358 if (page)
1359 return (unsigned long) page_address(page);
1360 return 0;
1361}
1362
1352/** 1363/**
1353 * ecryptfs_write_metadata 1364 * ecryptfs_write_metadata
1354 * @ecryptfs_dentry: The eCryptfs dentry 1365 * @ecryptfs_dentry: The eCryptfs dentry
@@ -1365,7 +1376,9 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1365{ 1376{
1366 struct ecryptfs_crypt_stat *crypt_stat = 1377 struct ecryptfs_crypt_stat *crypt_stat =
1367 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; 1378 &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
1379 unsigned int order;
1368 char *virt; 1380 char *virt;
1381 size_t virt_len;
1369 size_t size = 0; 1382 size_t size = 0;
1370 int rc = 0; 1383 int rc = 0;
1371 1384
@@ -1381,33 +1394,35 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1381 rc = -EINVAL; 1394 rc = -EINVAL;
1382 goto out; 1395 goto out;
1383 } 1396 }
1397 virt_len = crypt_stat->num_header_bytes_at_front;
1398 order = get_order(virt_len);
1384 /* Released in this function */ 1399 /* Released in this function */
1385 virt = (char *)get_zeroed_page(GFP_KERNEL); 1400 virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order);
1386 if (!virt) { 1401 if (!virt) {
1387 printk(KERN_ERR "%s: Out of memory\n", __func__); 1402 printk(KERN_ERR "%s: Out of memory\n", __func__);
1388 rc = -ENOMEM; 1403 rc = -ENOMEM;
1389 goto out; 1404 goto out;
1390 } 1405 }
1391 rc = ecryptfs_write_headers_virt(virt, PAGE_CACHE_SIZE, &size, 1406 rc = ecryptfs_write_headers_virt(virt, virt_len, &size, crypt_stat,
1392 crypt_stat, ecryptfs_dentry); 1407 ecryptfs_dentry);
1393 if (unlikely(rc)) { 1408 if (unlikely(rc)) {
1394 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", 1409 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
1395 __func__, rc); 1410 __func__, rc);
1396 goto out_free; 1411 goto out_free;
1397 } 1412 }
1398 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 1413 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
1399 rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, 1414 rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt,
1400 crypt_stat, virt, size); 1415 size);
1401 else 1416 else
1402 rc = ecryptfs_write_metadata_to_contents(crypt_stat, 1417 rc = ecryptfs_write_metadata_to_contents(ecryptfs_dentry, virt,
1403 ecryptfs_dentry, virt); 1418 virt_len);
1404 if (rc) { 1419 if (rc) {
1405 printk(KERN_ERR "%s: Error writing metadata out to lower file; " 1420 printk(KERN_ERR "%s: Error writing metadata out to lower file; "
1406 "rc = [%d]\n", __func__, rc); 1421 "rc = [%d]\n", __func__, rc);
1407 goto out_free; 1422 goto out_free;
1408 } 1423 }
1409out_free: 1424out_free:
1410 free_page((unsigned long)virt); 1425 free_pages((unsigned long)virt, order);
1411out: 1426out:
1412 return rc; 1427 return rc;
1413} 1428}
@@ -2206,17 +2221,19 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
2206 struct dentry *ecryptfs_dir_dentry, 2221 struct dentry *ecryptfs_dir_dentry,
2207 const char *name, size_t name_size) 2222 const char *name, size_t name_size)
2208{ 2223{
2224 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
2225 &ecryptfs_superblock_to_private(
2226 ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
2209 char *decoded_name; 2227 char *decoded_name;
2210 size_t decoded_name_size; 2228 size_t decoded_name_size;
2211 size_t packet_size; 2229 size_t packet_size;
2212 int rc = 0; 2230 int rc = 0;
2213 2231
2214 if ((name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) 2232 if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
2233 && !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
2234 && (name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
2215 && (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, 2235 && (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
2216 ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) { 2236 ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) {
2217 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
2218 &ecryptfs_superblock_to_private(
2219 ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
2220 const char *orig_name = name; 2237 const char *orig_name = name;
2221 size_t orig_name_size = name_size; 2238 size_t orig_name_size = name_size;
2222 2239
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c11fc95714ab..ac749d4d644f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -328,6 +328,7 @@ struct ecryptfs_dentry_info {
328 */ 328 */
329struct ecryptfs_global_auth_tok { 329struct ecryptfs_global_auth_tok {
330#define ECRYPTFS_AUTH_TOK_INVALID 0x00000001 330#define ECRYPTFS_AUTH_TOK_INVALID 0x00000001
331#define ECRYPTFS_AUTH_TOK_FNEK 0x00000002
331 u32 flags; 332 u32 flags;
332 struct list_head mount_crypt_stat_list; 333 struct list_head mount_crypt_stat_list;
333 struct key *global_auth_tok_key; 334 struct key *global_auth_tok_key;
@@ -619,7 +620,6 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
619 u32 flags); 620 u32 flags);
620int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 621int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
621 struct dentry *lower_dentry, 622 struct dentry *lower_dentry,
622 struct ecryptfs_crypt_stat *crypt_stat,
623 struct inode *ecryptfs_dir_inode, 623 struct inode *ecryptfs_dir_inode,
624 struct nameidata *ecryptfs_nd); 624 struct nameidata *ecryptfs_nd);
625int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, 625int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
@@ -696,7 +696,7 @@ ecryptfs_write_header_metadata(char *virt,
696int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig); 696int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig);
697int 697int
698ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 698ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
699 char *sig); 699 char *sig, u32 global_auth_tok_flags);
700int ecryptfs_get_global_auth_tok_for_sig( 700int ecryptfs_get_global_auth_tok_for_sig(
701 struct ecryptfs_global_auth_tok **global_auth_tok, 701 struct ecryptfs_global_auth_tok **global_auth_tok,
702 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig); 702 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5697899a168d..55b3145b8072 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -246,7 +246,6 @@ out:
246 */ 246 */
247int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 247int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
248 struct dentry *lower_dentry, 248 struct dentry *lower_dentry,
249 struct ecryptfs_crypt_stat *crypt_stat,
250 struct inode *ecryptfs_dir_inode, 249 struct inode *ecryptfs_dir_inode,
251 struct nameidata *ecryptfs_nd) 250 struct nameidata *ecryptfs_nd)
252{ 251{
@@ -254,6 +253,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
254 struct vfsmount *lower_mnt; 253 struct vfsmount *lower_mnt;
255 struct inode *lower_inode; 254 struct inode *lower_inode;
256 struct ecryptfs_mount_crypt_stat *mount_crypt_stat; 255 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
256 struct ecryptfs_crypt_stat *crypt_stat;
257 char *page_virt = NULL; 257 char *page_virt = NULL;
258 u64 file_size; 258 u64 file_size;
259 int rc = 0; 259 int rc = 0;
@@ -314,6 +314,11 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
314 goto out_free_kmem; 314 goto out_free_kmem;
315 } 315 }
316 } 316 }
317 crypt_stat = &ecryptfs_inode_to_private(
318 ecryptfs_dentry->d_inode)->crypt_stat;
319 /* TODO: lock for crypt_stat comparison */
320 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
321 ecryptfs_set_default_sizes(crypt_stat);
317 rc = ecryptfs_read_and_validate_header_region(page_virt, 322 rc = ecryptfs_read_and_validate_header_region(page_virt,
318 ecryptfs_dentry->d_inode); 323 ecryptfs_dentry->d_inode);
319 if (rc) { 324 if (rc) {
@@ -362,9 +367,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
362{ 367{
363 char *encrypted_and_encoded_name = NULL; 368 char *encrypted_and_encoded_name = NULL;
364 size_t encrypted_and_encoded_name_size; 369 size_t encrypted_and_encoded_name_size;
365 struct ecryptfs_crypt_stat *crypt_stat = NULL;
366 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; 370 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
367 struct ecryptfs_inode_info *inode_info;
368 struct dentry *lower_dir_dentry, *lower_dentry; 371 struct dentry *lower_dir_dentry, *lower_dentry;
369 int rc = 0; 372 int rc = 0;
370 373
@@ -388,26 +391,15 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
388 } 391 }
389 if (lower_dentry->d_inode) 392 if (lower_dentry->d_inode)
390 goto lookup_and_interpose; 393 goto lookup_and_interpose;
391 inode_info = ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 394 mount_crypt_stat = &ecryptfs_superblock_to_private(
392 if (inode_info) { 395 ecryptfs_dentry->d_sb)->mount_crypt_stat;
393 crypt_stat = &inode_info->crypt_stat; 396 if (!(mount_crypt_stat
394 /* TODO: lock for crypt_stat comparison */ 397 && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
395 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
396 ecryptfs_set_default_sizes(crypt_stat);
397 }
398 if (crypt_stat)
399 mount_crypt_stat = crypt_stat->mount_crypt_stat;
400 else
401 mount_crypt_stat = &ecryptfs_superblock_to_private(
402 ecryptfs_dentry->d_sb)->mount_crypt_stat;
403 if (!(crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES))
404 && !(mount_crypt_stat && (mount_crypt_stat->flags
405 & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
406 goto lookup_and_interpose; 398 goto lookup_and_interpose;
407 dput(lower_dentry); 399 dput(lower_dentry);
408 rc = ecryptfs_encrypt_and_encode_filename( 400 rc = ecryptfs_encrypt_and_encode_filename(
409 &encrypted_and_encoded_name, &encrypted_and_encoded_name_size, 401 &encrypted_and_encoded_name, &encrypted_and_encoded_name_size,
410 crypt_stat, mount_crypt_stat, ecryptfs_dentry->d_name.name, 402 NULL, mount_crypt_stat, ecryptfs_dentry->d_name.name,
411 ecryptfs_dentry->d_name.len); 403 ecryptfs_dentry->d_name.len);
412 if (rc) { 404 if (rc) {
413 printk(KERN_ERR "%s: Error attempting to encrypt and encode " 405 printk(KERN_ERR "%s: Error attempting to encrypt and encode "
@@ -426,7 +418,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
426 } 418 }
427lookup_and_interpose: 419lookup_and_interpose:
428 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, 420 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
429 crypt_stat, ecryptfs_dir_inode, 421 ecryptfs_dir_inode,
430 ecryptfs_nd); 422 ecryptfs_nd);
431 goto out; 423 goto out;
432out_d_drop: 424out_d_drop:
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index ff539420cc6f..e4a6223c3145 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -2375,7 +2375,7 @@ struct kmem_cache *ecryptfs_global_auth_tok_cache;
2375 2375
2376int 2376int
2377ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 2377ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
2378 char *sig) 2378 char *sig, u32 global_auth_tok_flags)
2379{ 2379{
2380 struct ecryptfs_global_auth_tok *new_auth_tok; 2380 struct ecryptfs_global_auth_tok *new_auth_tok;
2381 int rc = 0; 2381 int rc = 0;
@@ -2389,6 +2389,7 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
2389 goto out; 2389 goto out;
2390 } 2390 }
2391 memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX); 2391 memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX);
2392 new_auth_tok->flags = global_auth_tok_flags;
2392 new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0'; 2393 new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
2393 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); 2394 mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
2394 list_add(&new_auth_tok->mount_crypt_stat_list, 2395 list_add(&new_auth_tok->mount_crypt_stat_list,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 789cf2e1be1e..aed56c25539b 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -319,7 +319,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
319 case ecryptfs_opt_ecryptfs_sig: 319 case ecryptfs_opt_ecryptfs_sig:
320 sig_src = args[0].from; 320 sig_src = args[0].from;
321 rc = ecryptfs_add_global_auth_tok(mount_crypt_stat, 321 rc = ecryptfs_add_global_auth_tok(mount_crypt_stat,
322 sig_src); 322 sig_src, 0);
323 if (rc) { 323 if (rc) {
324 printk(KERN_ERR "Error attempting to register " 324 printk(KERN_ERR "Error attempting to register "
325 "global sig; rc = [%d]\n", rc); 325 "global sig; rc = [%d]\n", rc);
@@ -370,7 +370,8 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
370 ECRYPTFS_SIG_SIZE_HEX] = '\0'; 370 ECRYPTFS_SIG_SIZE_HEX] = '\0';
371 rc = ecryptfs_add_global_auth_tok( 371 rc = ecryptfs_add_global_auth_tok(
372 mount_crypt_stat, 372 mount_crypt_stat,
373 mount_crypt_stat->global_default_fnek_sig); 373 mount_crypt_stat->global_default_fnek_sig,
374 ECRYPTFS_AUTH_TOK_FNEK);
374 if (rc) { 375 if (rc) {
375 printk(KERN_ERR "Error attempting to register " 376 printk(KERN_ERR "Error attempting to register "
376 "global fnek sig [%s]; rc = [%d]\n", 377 "global fnek sig [%s]; rc = [%d]\n",
diff --git a/fs/exec.c b/fs/exec.c
index 929b58004b7e..b9f1c144b7a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -45,6 +45,7 @@
45#include <linux/proc_fs.h> 45#include <linux/proc_fs.h>
46#include <linux/mount.h> 46#include <linux/mount.h>
47#include <linux/security.h> 47#include <linux/security.h>
48#include <linux/ima.h>
48#include <linux/syscalls.h> 49#include <linux/syscalls.h>
49#include <linux/tsacct_kern.h> 50#include <linux/tsacct_kern.h>
50#include <linux/cn_proc.h> 51#include <linux/cn_proc.h>
@@ -127,6 +128,9 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
127 MAY_READ | MAY_EXEC | MAY_OPEN); 128 MAY_READ | MAY_EXEC | MAY_OPEN);
128 if (error) 129 if (error)
129 goto exit; 130 goto exit;
131 error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN);
132 if (error)
133 goto exit;
130 134
131 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); 135 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
132 error = PTR_ERR(file); 136 error = PTR_ERR(file);
@@ -674,6 +678,9 @@ struct file *open_exec(const char *name)
674 err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); 678 err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
675 if (err) 679 if (err)
676 goto out_path_put; 680 goto out_path_put;
681 err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN);
682 if (err)
683 goto out_path_put;
677 684
678 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); 685 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
679 if (IS_ERR(file)) 686 if (IS_ERR(file))
@@ -1184,6 +1191,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1184 retval = security_bprm_check(bprm); 1191 retval = security_bprm_check(bprm);
1185 if (retval) 1192 if (retval)
1186 return retval; 1193 return retval;
1194 retval = ima_bprm_check(bprm);
1195 if (retval)
1196 return retval;
1187 1197
1188 /* kernel module loader fixup */ 1198 /* kernel module loader fixup */
1189 /* so we don't try to load run modprobe in kernel space. */ 1199 /* so we don't try to load run modprobe in kernel space. */
@@ -1284,6 +1294,7 @@ int do_execve(char * filename,
1284 retval = mutex_lock_interruptible(&current->cred_exec_mutex); 1294 retval = mutex_lock_interruptible(&current->cred_exec_mutex);
1285 if (retval < 0) 1295 if (retval < 0)
1286 goto out_free; 1296 goto out_free;
1297 current->in_execve = 1;
1287 1298
1288 retval = -ENOMEM; 1299 retval = -ENOMEM;
1289 bprm->cred = prepare_exec_creds(); 1300 bprm->cred = prepare_exec_creds();
@@ -1337,6 +1348,7 @@ int do_execve(char * filename,
1337 goto out; 1348 goto out;
1338 1349
1339 /* execve succeeded */ 1350 /* execve succeeded */
1351 current->in_execve = 0;
1340 mutex_unlock(&current->cred_exec_mutex); 1352 mutex_unlock(&current->cred_exec_mutex);
1341 acct_update_integrals(current); 1353 acct_update_integrals(current);
1342 free_bprm(bprm); 1354 free_bprm(bprm);
@@ -1355,6 +1367,7 @@ out_file:
1355 } 1367 }
1356 1368
1357out_unlock: 1369out_unlock:
1370 current->in_execve = 0;
1358 mutex_unlock(&current->cred_exec_mutex); 1371 mutex_unlock(&current->cred_exec_mutex);
1359 1372
1360out_free: 1373out_free:
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e2eab196875f..e0aa4fe4f596 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1122,7 +1122,8 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1122 struct ext4_extent_idx *ix; 1122 struct ext4_extent_idx *ix;
1123 struct ext4_extent *ex; 1123 struct ext4_extent *ex;
1124 ext4_fsblk_t block; 1124 ext4_fsblk_t block;
1125 int depth, ee_len; 1125 int depth; /* Note, NOT eh_depth; depth from top of tree */
1126 int ee_len;
1126 1127
1127 BUG_ON(path == NULL); 1128 BUG_ON(path == NULL);
1128 depth = path->p_depth; 1129 depth = path->p_depth;
@@ -1179,7 +1180,8 @@ got_index:
1179 if (bh == NULL) 1180 if (bh == NULL)
1180 return -EIO; 1181 return -EIO;
1181 eh = ext_block_hdr(bh); 1182 eh = ext_block_hdr(bh);
1182 if (ext4_ext_check_header(inode, eh, depth)) { 1183 /* subtract from p_depth to get proper eh_depth */
1184 if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
1183 put_bh(bh); 1185 put_bh(bh);
1184 return -EIO; 1186 return -EIO;
1185 } 1187 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18a919be70b..2d2b3585ee91 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -188,7 +188,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
188 struct ext4_group_desc *gdp; 188 struct ext4_group_desc *gdp;
189 struct ext4_super_block *es; 189 struct ext4_super_block *es;
190 struct ext4_sb_info *sbi; 190 struct ext4_sb_info *sbi;
191 int fatal = 0, err, count; 191 int fatal = 0, err, count, cleared;
192 ext4_group_t flex_group; 192 ext4_group_t flex_group;
193 193
194 if (atomic_read(&inode->i_count) > 1) { 194 if (atomic_read(&inode->i_count) > 1) {
@@ -248,8 +248,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
248 goto error_return; 248 goto error_return;
249 249
250 /* Ok, now we can actually update the inode bitmaps.. */ 250 /* Ok, now we can actually update the inode bitmaps.. */
251 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 251 spin_lock(sb_bgl_lock(sbi, block_group));
252 bit, bitmap_bh->b_data)) 252 cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
253 spin_unlock(sb_bgl_lock(sbi, block_group));
254 if (!cleared)
253 ext4_error(sb, "ext4_free_inode", 255 ext4_error(sb, "ext4_free_inode",
254 "bit already cleared for inode %lu", ino); 256 "bit already cleared for inode %lu", ino);
255 else { 257 else {
@@ -696,6 +698,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
696 struct inode *ret; 698 struct inode *ret;
697 ext4_group_t i; 699 ext4_group_t i;
698 int free = 0; 700 int free = 0;
701 static int once = 1;
699 ext4_group_t flex_group; 702 ext4_group_t flex_group;
700 703
701 /* Cannot create files in a deleted directory */ 704 /* Cannot create files in a deleted directory */
@@ -717,7 +720,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
717 ret2 = find_group_flex(sb, dir, &group); 720 ret2 = find_group_flex(sb, dir, &group);
718 if (ret2 == -1) { 721 if (ret2 == -1) {
719 ret2 = find_group_other(sb, dir, &group); 722 ret2 = find_group_other(sb, dir, &group);
720 if (ret2 == 0 && printk_ratelimit()) 723 if (ret2 == 0 && once)
724 once = 0;
721 printk(KERN_NOTICE "ext4: find_group_flex " 725 printk(KERN_NOTICE "ext4: find_group_flex "
722 "failed, fallback succeeded dir %lu\n", 726 "failed, fallback succeeded dir %lu\n",
723 dir->i_ino); 727 dir->i_ino);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4415beeb0b62..9f61e62f435f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1447,7 +1447,7 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1447 struct ext4_free_extent *gex = &ac->ac_g_ex; 1447 struct ext4_free_extent *gex = &ac->ac_g_ex;
1448 1448
1449 BUG_ON(ex->fe_len <= 0); 1449 BUG_ON(ex->fe_len <= 0);
1450 BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1450 BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1451 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1451 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
1452 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); 1452 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1453 1453
@@ -3292,7 +3292,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3292 } 3292 }
3293 BUG_ON(start + size <= ac->ac_o_ex.fe_logical && 3293 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3294 start > ac->ac_o_ex.fe_logical); 3294 start > ac->ac_o_ex.fe_logical);
3295 BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 3295 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3296 3296
3297 /* now prepare goal request */ 3297 /* now prepare goal request */
3298 3298
@@ -3589,6 +3589,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3589 struct super_block *sb, struct ext4_prealloc_space *pa) 3589 struct super_block *sb, struct ext4_prealloc_space *pa)
3590{ 3590{
3591 ext4_group_t grp; 3591 ext4_group_t grp;
3592 ext4_fsblk_t grp_blk;
3592 3593
3593 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) 3594 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
3594 return; 3595 return;
@@ -3603,8 +3604,12 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3603 pa->pa_deleted = 1; 3604 pa->pa_deleted = 1;
3604 spin_unlock(&pa->pa_lock); 3605 spin_unlock(&pa->pa_lock);
3605 3606
3606 /* -1 is to protect from crossing allocation group */ 3607 grp_blk = pa->pa_pstart;
3607 ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL); 3608 /* If linear, pa_pstart may be in the next group when pa is used up */
3609 if (pa->pa_linear)
3610 grp_blk--;
3611
3612 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
3608 3613
3609 /* 3614 /*
3610 * possible race: 3615 * possible race:
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 6b74d09adbe5..de0004fe6e00 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -202,9 +202,9 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
202 sector_t blocknr; 202 sector_t blocknr;
203 203
204 /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ 204 /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
205 mutex_lock(&mapping->host->i_mutex); 205 down_read(&mapping->host->i_alloc_sem);
206 blocknr = generic_block_bmap(mapping, block, fat_get_block); 206 blocknr = generic_block_bmap(mapping, block, fat_get_block);
207 mutex_unlock(&mapping->host->i_mutex); 207 up_read(&mapping->host->i_alloc_sem);
208 208
209 return blocknr; 209 return blocknr;
210} 210}
diff --git a/fs/file_table.c b/fs/file_table.c
index bbeeac6efa1a..da806aceae3f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/ima.h>
16#include <linux/eventpoll.h> 17#include <linux/eventpoll.h>
17#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
18#include <linux/mount.h> 19#include <linux/mount.h>
@@ -279,6 +280,7 @@ void __fput(struct file *file)
279 if (file->f_op && file->f_op->release) 280 if (file->f_op && file->f_op->release)
280 file->f_op->release(inode, file); 281 file->f_op->release(inode, file);
281 security_file_free(file); 282 security_file_free(file);
283 ima_file_free(file);
282 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 284 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
283 cdev_put(inode->i_cdev); 285 cdev_put(inode->i_cdev);
284 fops_put(file->f_op); 286 fops_put(file->f_op);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e5eaa62fd17f..e3fe9918faaf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -274,6 +274,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
274 int ret; 274 int ret;
275 275
276 BUG_ON(inode->i_state & I_SYNC); 276 BUG_ON(inode->i_state & I_SYNC);
277 WARN_ON(inode->i_state & I_NEW);
277 278
278 /* Set I_SYNC, reset I_DIRTY */ 279 /* Set I_SYNC, reset I_DIRTY */
279 dirty = inode->i_state & I_DIRTY; 280 dirty = inode->i_state & I_DIRTY;
@@ -298,6 +299,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
298 } 299 }
299 300
300 spin_lock(&inode_lock); 301 spin_lock(&inode_lock);
302 WARN_ON(inode->i_state & I_NEW);
301 inode->i_state &= ~I_SYNC; 303 inode->i_state &= ~I_SYNC;
302 if (!(inode->i_state & I_FREEING)) { 304 if (!(inode->i_state & I_FREEING)) {
303 if (!(inode->i_state & I_DIRTY) && 305 if (!(inode->i_state & I_DIRTY) &&
@@ -470,6 +472,11 @@ void generic_sync_sb_inodes(struct super_block *sb,
470 break; 472 break;
471 } 473 }
472 474
475 if (inode->i_state & I_NEW) {
476 requeue_io(inode);
477 continue;
478 }
479
473 if (wbc->nonblocking && bdi_write_congested(bdi)) { 480 if (wbc->nonblocking && bdi_write_congested(bdi)) {
474 wbc->encountered_congestion = 1; 481 wbc->encountered_congestion = 1;
475 if (!sb_is_blkdev_sb(sb)) 482 if (!sb_is_blkdev_sb(sb))
@@ -531,7 +538,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
531 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 538 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
532 struct address_space *mapping; 539 struct address_space *mapping;
533 540
534 if (inode->i_state & (I_FREEING|I_WILL_FREE)) 541 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
535 continue; 542 continue;
536 mapping = inode->i_mapping; 543 mapping = inode->i_mapping;
537 if (mapping->nrpages == 0) 544 if (mapping->nrpages == 0)
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index e563a6449811..3a981b7f64ca 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,6 +1,10 @@
1config GFS2_FS 1config GFS2_FS
2 tristate "GFS2 file system support" 2 tristate "GFS2 file system support"
3 depends on EXPERIMENTAL && (64BIT || LBD) 3 depends on EXPERIMENTAL && (64BIT || LBD)
4 select DLM if GFS2_FS_LOCKING_DLM
5 select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
6 select SYSFS if GFS2_FS_LOCKING_DLM
7 select IP_SCTP if DLM_SCTP
4 select FS_POSIX_ACL 8 select FS_POSIX_ACL
5 select CRC32 9 select CRC32
6 help 10 help
@@ -18,17 +22,16 @@ config GFS2_FS
18 the locking module below. Documentation and utilities for GFS2 can 22 the locking module below. Documentation and utilities for GFS2 can
19 be found here: http://sources.redhat.com/cluster 23 be found here: http://sources.redhat.com/cluster
20 24
21 The "nolock" lock module is now built in to GFS2 by default. 25 The "nolock" lock module is now built in to GFS2 by default. If
26 you want to use the DLM, be sure to enable HOTPLUG and IPv4/6
27 networking.
22 28
23config GFS2_FS_LOCKING_DLM 29config GFS2_FS_LOCKING_DLM
24 tristate "GFS2 DLM locking module" 30 bool "GFS2 DLM locking"
25 depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n) 31 depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
26 select IP_SCTP if DLM_SCTP
27 select CONFIGFS_FS
28 select DLM
29 help 32 help
30 Multiple node locking module for GFS2 33 Multiple node locking module for GFS2
31 34
32 Most users of GFS2 will require this module. It provides the locking 35 Most users of GFS2 will require this. It provides the locking
33 interface between GFS2 and the DLM, which is required to use GFS2 36 interface between GFS2 and the DLM, which is required to use GFS2
34 in a cluster environment. 37 in a cluster environment.
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index c1b4ec6a9650..a851ea4bdf70 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o 1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ 2gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
3 glops.o inode.o log.o lops.o locking.o main.o meta_io.o \ 3 glops.o inode.o log.o lops.o main.o meta_io.o \
4 mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ 4 mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
5 ops_fstype.o ops_inode.o ops_super.o quota.o \ 5 ops_fstype.o ops_inode.o ops_super.o quota.o \
6 recovery.o rgrp.o super.o sys.o trans.o util.o 6 recovery.o rgrp.o super.o sys.o trans.o util.o
7 7
8obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ 8gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
9 9
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index e335dceb6a4f..43764f4fa763 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -15,7 +15,6 @@
15#include <linux/posix_acl.h> 15#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h> 16#include <linux/posix_acl_xattr.h>
17#include <linux/gfs2_ondisk.h> 17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19 18
20#include "gfs2.h" 19#include "gfs2.h"
21#include "incore.h" 20#include "incore.h"
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 11ffc56f1f81..3a5d3f883e10 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/crc32.h> 15#include <linux/crc32.h>
16#include <linux/lm_interface.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
19#include "incore.h" 18#include "incore.h"
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index b7c8e5c70791..aef4d0c06748 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -60,7 +60,6 @@
60#include <linux/gfs2_ondisk.h> 60#include <linux/gfs2_ondisk.h>
61#include <linux/crc32.h> 61#include <linux/crc32.h>
62#include <linux/vmalloc.h> 62#include <linux/vmalloc.h>
63#include <linux/lm_interface.h>
64 63
65#include "gfs2.h" 64#include "gfs2.h"
66#include "incore.h" 65#include "incore.h"
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index f114ba2b3557..dee9b03e5b37 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -14,7 +14,6 @@
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/xattr.h> 15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/uaccess.h> 17#include <asm/uaccess.h>
19 18
20#include "gfs2.h" 19#include "gfs2.h"
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 0d1c76d906ae..899763aed217 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/xattr.h> 14#include <linux/xattr.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17#include <asm/uaccess.h> 16#include <asm/uaccess.h>
18 17
19#include "gfs2.h" 18#include "gfs2.h"
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6b983aef785d..3984e47d1d33 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -10,7 +10,6 @@
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
15#include <linux/delay.h> 14#include <linux/delay.h>
16#include <linux/sort.h> 15#include <linux/sort.h>
@@ -18,7 +17,6 @@
18#include <linux/kallsyms.h> 17#include <linux/kallsyms.h>
19#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
20#include <linux/list.h> 19#include <linux/list.h>
21#include <linux/lm_interface.h>
22#include <linux/wait.h> 20#include <linux/wait.h>
23#include <linux/module.h> 21#include <linux/module.h>
24#include <linux/rwsem.h> 22#include <linux/rwsem.h>
@@ -155,13 +153,10 @@ static void glock_free(struct gfs2_glock *gl)
155 struct gfs2_sbd *sdp = gl->gl_sbd; 153 struct gfs2_sbd *sdp = gl->gl_sbd;
156 struct inode *aspace = gl->gl_aspace; 154 struct inode *aspace = gl->gl_aspace;
157 155
158 if (sdp->sd_lockstruct.ls_ops->lm_put_lock)
159 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock);
160
161 if (aspace) 156 if (aspace)
162 gfs2_aspace_put(aspace); 157 gfs2_aspace_put(aspace);
163 158
164 kmem_cache_free(gfs2_glock_cachep, gl); 159 sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
165} 160}
166 161
167/** 162/**
@@ -172,6 +167,7 @@ static void glock_free(struct gfs2_glock *gl)
172 167
173static void gfs2_glock_hold(struct gfs2_glock *gl) 168static void gfs2_glock_hold(struct gfs2_glock *gl)
174{ 169{
170 GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0);
175 atomic_inc(&gl->gl_ref); 171 atomic_inc(&gl->gl_ref);
176} 172}
177 173
@@ -211,17 +207,15 @@ int gfs2_glock_put(struct gfs2_glock *gl)
211 atomic_dec(&lru_count); 207 atomic_dec(&lru_count);
212 } 208 }
213 spin_unlock(&lru_lock); 209 spin_unlock(&lru_lock);
214 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED);
215 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_lru));
216 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 210 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
217 glock_free(gl); 211 glock_free(gl);
218 rv = 1; 212 rv = 1;
219 goto out; 213 goto out;
220 } 214 }
221 write_unlock(gl_lock_addr(gl->gl_hash));
222 /* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */ 215 /* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */
223 if (atomic_read(&gl->gl_ref) == 2) 216 if (atomic_read(&gl->gl_ref) == 2)
224 gfs2_glock_schedule_for_reclaim(gl); 217 gfs2_glock_schedule_for_reclaim(gl);
218 write_unlock(gl_lock_addr(gl->gl_hash));
225out: 219out:
226 return rv; 220 return rv;
227} 221}
@@ -256,27 +250,6 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
256} 250}
257 251
258/** 252/**
259 * gfs2_glock_find() - Find glock by lock number
260 * @sdp: The GFS2 superblock
261 * @name: The lock name
262 *
263 * Returns: NULL, or the struct gfs2_glock with the requested number
264 */
265
266static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
267 const struct lm_lockname *name)
268{
269 unsigned int hash = gl_hash(sdp, name);
270 struct gfs2_glock *gl;
271
272 read_lock(gl_lock_addr(hash));
273 gl = search_bucket(hash, sdp, name);
274 read_unlock(gl_lock_addr(hash));
275
276 return gl;
277}
278
279/**
280 * may_grant - check if its ok to grant a new lock 253 * may_grant - check if its ok to grant a new lock
281 * @gl: The glock 254 * @gl: The glock
282 * @gh: The lock request which we wish to grant 255 * @gh: The lock request which we wish to grant
@@ -523,7 +496,7 @@ out_locked:
523} 496}
524 497
525static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, 498static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
526 unsigned int cur_state, unsigned int req_state, 499 unsigned int req_state,
527 unsigned int flags) 500 unsigned int flags)
528{ 501{
529 int ret = LM_OUT_ERROR; 502 int ret = LM_OUT_ERROR;
@@ -532,7 +505,7 @@ static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
532 return req_state == LM_ST_UNLOCKED ? 0 : req_state; 505 return req_state == LM_ST_UNLOCKED ? 0 : req_state;
533 506
534 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 507 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
535 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, 508 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
536 req_state, flags); 509 req_state, flags);
537 return ret; 510 return ret;
538} 511}
@@ -575,7 +548,7 @@ __acquires(&gl->gl_spin)
575 gl->gl_state == LM_ST_DEFERRED) && 548 gl->gl_state == LM_ST_DEFERRED) &&
576 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 549 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
577 lck_flags |= LM_FLAG_TRY_1CB; 550 lck_flags |= LM_FLAG_TRY_1CB;
578 ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, target, lck_flags); 551 ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
579 552
580 if (!(ret & LM_OUT_ASYNC)) { 553 if (!(ret & LM_OUT_ASYNC)) {
581 finish_xmote(gl, ret); 554 finish_xmote(gl, ret);
@@ -624,10 +597,11 @@ __acquires(&gl->gl_spin)
624 597
625 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); 598 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
626 599
600 down_read(&gfs2_umount_flush_sem);
627 if (test_bit(GLF_DEMOTE, &gl->gl_flags) && 601 if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
628 gl->gl_demote_state != gl->gl_state) { 602 gl->gl_demote_state != gl->gl_state) {
629 if (find_first_holder(gl)) 603 if (find_first_holder(gl))
630 goto out; 604 goto out_unlock;
631 if (nonblock) 605 if (nonblock)
632 goto out_sched; 606 goto out_sched;
633 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 607 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
@@ -638,23 +612,26 @@ __acquires(&gl->gl_spin)
638 gfs2_demote_wake(gl); 612 gfs2_demote_wake(gl);
639 ret = do_promote(gl); 613 ret = do_promote(gl);
640 if (ret == 0) 614 if (ret == 0)
641 goto out; 615 goto out_unlock;
642 if (ret == 2) 616 if (ret == 2)
643 return; 617 goto out_sem;
644 gh = find_first_waiter(gl); 618 gh = find_first_waiter(gl);
645 gl->gl_target = gh->gh_state; 619 gl->gl_target = gh->gh_state;
646 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 620 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
647 do_error(gl, 0); /* Fail queued try locks */ 621 do_error(gl, 0); /* Fail queued try locks */
648 } 622 }
649 do_xmote(gl, gh, gl->gl_target); 623 do_xmote(gl, gh, gl->gl_target);
624out_sem:
625 up_read(&gfs2_umount_flush_sem);
650 return; 626 return;
651 627
652out_sched: 628out_sched:
653 gfs2_glock_hold(gl); 629 gfs2_glock_hold(gl);
654 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 630 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
655 gfs2_glock_put(gl); 631 gfs2_glock_put(gl);
656out: 632out_unlock:
657 clear_bit(GLF_LOCK, &gl->gl_flags); 633 clear_bit(GLF_LOCK, &gl->gl_flags);
634 goto out_sem;
658} 635}
659 636
660static void glock_work_func(struct work_struct *work) 637static void glock_work_func(struct work_struct *work)
@@ -681,18 +658,6 @@ static void glock_work_func(struct work_struct *work)
681 gfs2_glock_put(gl); 658 gfs2_glock_put(gl);
682} 659}
683 660
684static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
685 void **lockp)
686{
687 int error = -EIO;
688 if (!sdp->sd_lockstruct.ls_ops->lm_get_lock)
689 return 0;
690 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
691 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
692 sdp->sd_lockstruct.ls_lockspace, name, lockp);
693 return error;
694}
695
696/** 661/**
697 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 662 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
698 * @sdp: The GFS2 superblock 663 * @sdp: The GFS2 superblock
@@ -719,10 +684,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
719 gl = search_bucket(hash, sdp, &name); 684 gl = search_bucket(hash, sdp, &name);
720 read_unlock(gl_lock_addr(hash)); 685 read_unlock(gl_lock_addr(hash));
721 686
722 if (gl || !create) { 687 *glp = gl;
723 *glp = gl; 688 if (gl)
724 return 0; 689 return 0;
725 } 690 if (!create)
691 return -ENOENT;
726 692
727 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); 693 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
728 if (!gl) 694 if (!gl)
@@ -736,7 +702,9 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
736 gl->gl_demote_state = LM_ST_EXCLUSIVE; 702 gl->gl_demote_state = LM_ST_EXCLUSIVE;
737 gl->gl_hash = hash; 703 gl->gl_hash = hash;
738 gl->gl_ops = glops; 704 gl->gl_ops = glops;
739 gl->gl_stamp = jiffies; 705 snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number);
706 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
707 gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
740 gl->gl_tchange = jiffies; 708 gl->gl_tchange = jiffies;
741 gl->gl_object = NULL; 709 gl->gl_object = NULL;
742 gl->gl_sbd = sdp; 710 gl->gl_sbd = sdp;
@@ -753,10 +721,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
753 } 721 }
754 } 722 }
755 723
756 error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
757 if (error)
758 goto fail_aspace;
759
760 write_lock(gl_lock_addr(hash)); 724 write_lock(gl_lock_addr(hash));
761 tmp = search_bucket(hash, sdp, &name); 725 tmp = search_bucket(hash, sdp, &name);
762 if (tmp) { 726 if (tmp) {
@@ -772,9 +736,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
772 736
773 return 0; 737 return 0;
774 738
775fail_aspace:
776 if (gl->gl_aspace)
777 gfs2_aspace_put(gl->gl_aspace);
778fail: 739fail:
779 kmem_cache_free(gfs2_glock_cachep, gl); 740 kmem_cache_free(gfs2_glock_cachep, gl);
780 return error; 741 return error;
@@ -966,7 +927,7 @@ do_cancel:
966 if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { 927 if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
967 spin_unlock(&gl->gl_spin); 928 spin_unlock(&gl->gl_spin);
968 if (sdp->sd_lockstruct.ls_ops->lm_cancel) 929 if (sdp->sd_lockstruct.ls_ops->lm_cancel)
969 sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); 930 sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
970 spin_lock(&gl->gl_spin); 931 spin_lock(&gl->gl_spin);
971 } 932 }
972 return; 933 return;
@@ -1051,7 +1012,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1051 spin_lock(&gl->gl_spin); 1012 spin_lock(&gl->gl_spin);
1052 clear_bit(GLF_LOCK, &gl->gl_flags); 1013 clear_bit(GLF_LOCK, &gl->gl_flags);
1053 } 1014 }
1054 gl->gl_stamp = jiffies;
1055 if (list_empty(&gl->gl_holders) && 1015 if (list_empty(&gl->gl_holders) &&
1056 !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1016 !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1057 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1017 !test_bit(GLF_DEMOTE, &gl->gl_flags))
@@ -1240,70 +1200,13 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1240 gfs2_glock_dq_uninit(&ghs[x]); 1200 gfs2_glock_dq_uninit(&ghs[x]);
1241} 1201}
1242 1202
1243static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) 1203void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1244{
1245 int error = -EIO;
1246 if (!sdp->sd_lockstruct.ls_ops->lm_hold_lvb)
1247 return 0;
1248 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1249 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
1250 return error;
1251}
1252
1253/**
1254 * gfs2_lvb_hold - attach a LVB from a glock
1255 * @gl: The glock in question
1256 *
1257 */
1258
1259int gfs2_lvb_hold(struct gfs2_glock *gl)
1260{
1261 int error;
1262
1263 if (!atomic_read(&gl->gl_lvb_count)) {
1264 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
1265 if (error)
1266 return error;
1267 gfs2_glock_hold(gl);
1268 }
1269 atomic_inc(&gl->gl_lvb_count);
1270
1271 return 0;
1272}
1273
1274/**
1275 * gfs2_lvb_unhold - detach a LVB from a glock
1276 * @gl: The glock in question
1277 *
1278 */
1279
1280void gfs2_lvb_unhold(struct gfs2_glock *gl)
1281{
1282 struct gfs2_sbd *sdp = gl->gl_sbd;
1283
1284 gfs2_glock_hold(gl);
1285 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1286 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1287 if (sdp->sd_lockstruct.ls_ops->lm_unhold_lvb)
1288 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb);
1289 gl->gl_lvb = NULL;
1290 gfs2_glock_put(gl);
1291 }
1292 gfs2_glock_put(gl);
1293}
1294
1295static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1296 unsigned int state)
1297{ 1204{
1298 struct gfs2_glock *gl;
1299 unsigned long delay = 0; 1205 unsigned long delay = 0;
1300 unsigned long holdtime; 1206 unsigned long holdtime;
1301 unsigned long now = jiffies; 1207 unsigned long now = jiffies;
1302 1208
1303 gl = gfs2_glock_find(sdp, name); 1209 gfs2_glock_hold(gl);
1304 if (!gl)
1305 return;
1306
1307 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; 1210 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
1308 if (time_before(now, holdtime)) 1211 if (time_before(now, holdtime))
1309 delay = holdtime - now; 1212 delay = holdtime - now;
@@ -1317,74 +1220,33 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1317 gfs2_glock_put(gl); 1220 gfs2_glock_put(gl);
1318} 1221}
1319 1222
1320static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
1321{
1322 struct gfs2_jdesc *jd;
1323
1324 spin_lock(&sdp->sd_jindex_spin);
1325 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
1326 if (jd->jd_jid != jid)
1327 continue;
1328 jd->jd_dirty = 1;
1329 break;
1330 }
1331 spin_unlock(&sdp->sd_jindex_spin);
1332}
1333
1334/** 1223/**
1335 * gfs2_glock_cb - Callback used by locking module 1224 * gfs2_glock_complete - Callback used by locking
1336 * @sdp: Pointer to the superblock 1225 * @gl: Pointer to the glock
1337 * @type: Type of callback 1226 * @ret: The return value from the dlm
1338 * @data: Type dependent data pointer
1339 * 1227 *
1340 * Called by the locking module when it wants to tell us something.
1341 * Either we need to drop a lock, one of our ASYNC requests completed, or
1342 * a journal from another client needs to be recovered.
1343 */ 1228 */
1344 1229
1345void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) 1230void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1346{ 1231{
1347 struct gfs2_sbd *sdp = cb_data; 1232 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
1348 1233 gl->gl_reply = ret;
1349 switch (type) { 1234 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
1350 case LM_CB_NEED_E: 1235 struct gfs2_holder *gh;
1351 blocking_cb(sdp, data, LM_ST_UNLOCKED); 1236 spin_lock(&gl->gl_spin);
1352 return; 1237 gh = find_first_waiter(gl);
1353 1238 if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) &&
1354 case LM_CB_NEED_D: 1239 (gl->gl_target != LM_ST_UNLOCKED)) ||
1355 blocking_cb(sdp, data, LM_ST_DEFERRED); 1240 ((ret & ~LM_OUT_ST_MASK) != 0))
1356 return; 1241 set_bit(GLF_FROZEN, &gl->gl_flags);
1357 1242 spin_unlock(&gl->gl_spin);
1358 case LM_CB_NEED_S: 1243 if (test_bit(GLF_FROZEN, &gl->gl_flags))
1359 blocking_cb(sdp, data, LM_ST_SHARED);
1360 return;
1361
1362 case LM_CB_ASYNC: {
1363 struct lm_async_cb *async = data;
1364 struct gfs2_glock *gl;
1365
1366 down_read(&gfs2_umount_flush_sem);
1367 gl = gfs2_glock_find(sdp, &async->lc_name);
1368 if (gfs2_assert_warn(sdp, gl))
1369 return; 1244 return;
1370 gl->gl_reply = async->lc_ret;
1371 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1372 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1373 gfs2_glock_put(gl);
1374 up_read(&gfs2_umount_flush_sem);
1375 return;
1376 }
1377
1378 case LM_CB_NEED_RECOVERY:
1379 gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
1380 if (sdp->sd_recoverd_process)
1381 wake_up_process(sdp->sd_recoverd_process);
1382 return;
1383
1384 default:
1385 gfs2_assert_warn(sdp, 0);
1386 return;
1387 } 1245 }
1246 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1247 gfs2_glock_hold(gl);
1248 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1249 gfs2_glock_put(gl);
1388} 1250}
1389 1251
1390/** 1252/**
@@ -1515,6 +1377,25 @@ out:
1515 return has_entries; 1377 return has_entries;
1516} 1378}
1517 1379
1380
1381/**
1382 * thaw_glock - thaw out a glock which has an unprocessed reply waiting
1383 * @gl: The glock to thaw
1384 *
1385 * N.B. When we freeze a glock, we leave a ref to the glock outstanding,
1386 * so this has to result in the ref count being dropped by one.
1387 */
1388
1389static void thaw_glock(struct gfs2_glock *gl)
1390{
1391 if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
1392 return;
1393 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1394 gfs2_glock_hold(gl);
1395 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1396 gfs2_glock_put(gl);
1397}
1398
1518/** 1399/**
1519 * clear_glock - look at a glock and see if we can free it from glock cache 1400 * clear_glock - look at a glock and see if we can free it from glock cache
1520 * @gl: the glock to look at 1401 * @gl: the glock to look at
@@ -1540,6 +1421,20 @@ static void clear_glock(struct gfs2_glock *gl)
1540} 1421}
1541 1422
1542/** 1423/**
1424 * gfs2_glock_thaw - Thaw any frozen glocks
1425 * @sdp: The super block
1426 *
1427 */
1428
1429void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1430{
1431 unsigned x;
1432
1433 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1434 examine_bucket(thaw_glock, sdp, x);
1435}
1436
1437/**
1543 * gfs2_gl_hash_clear - Empty out the glock hash table 1438 * gfs2_gl_hash_clear - Empty out the glock hash table
1544 * @sdp: the filesystem 1439 * @sdp: the filesystem
1545 * @wait: wait until it's all gone 1440 * @wait: wait until it's all gone
@@ -1619,7 +1514,7 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
1619 if (flags & LM_FLAG_NOEXP) 1514 if (flags & LM_FLAG_NOEXP)
1620 *p++ = 'e'; 1515 *p++ = 'e';
1621 if (flags & LM_FLAG_ANY) 1516 if (flags & LM_FLAG_ANY)
1622 *p++ = 'a'; 1517 *p++ = 'A';
1623 if (flags & LM_FLAG_PRIORITY) 1518 if (flags & LM_FLAG_PRIORITY)
1624 *p++ = 'p'; 1519 *p++ = 'p';
1625 if (flags & GL_ASYNC) 1520 if (flags & GL_ASYNC)
@@ -1683,6 +1578,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
1683 *p++ = 'i'; 1578 *p++ = 'i';
1684 if (test_bit(GLF_REPLY_PENDING, gflags)) 1579 if (test_bit(GLF_REPLY_PENDING, gflags))
1685 *p++ = 'r'; 1580 *p++ = 'r';
1581 if (test_bit(GLF_INITIAL, gflags))
1582 *p++ = 'I';
1583 if (test_bit(GLF_FROZEN, gflags))
1584 *p++ = 'F';
1686 *p = 0; 1585 *p = 0;
1687 return buf; 1586 return buf;
1688} 1587}
@@ -1717,14 +1616,13 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1717 dtime *= 1000000/HZ; /* demote time in uSec */ 1616 dtime *= 1000000/HZ; /* demote time in uSec */
1718 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1617 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1719 dtime = 0; 1618 dtime = 0;
1720 gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu l:%d a:%d r:%d\n", 1619 gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu a:%d r:%d\n",
1721 state2str(gl->gl_state), 1620 state2str(gl->gl_state),
1722 gl->gl_name.ln_type, 1621 gl->gl_name.ln_type,
1723 (unsigned long long)gl->gl_name.ln_number, 1622 (unsigned long long)gl->gl_name.ln_number,
1724 gflags2str(gflags_buf, &gl->gl_flags), 1623 gflags2str(gflags_buf, &gl->gl_flags),
1725 state2str(gl->gl_target), 1624 state2str(gl->gl_target),
1726 state2str(gl->gl_demote_state), dtime, 1625 state2str(gl->gl_demote_state), dtime,
1727 atomic_read(&gl->gl_lvb_count),
1728 atomic_read(&gl->gl_ail_count), 1626 atomic_read(&gl->gl_ail_count),
1729 atomic_read(&gl->gl_ref)); 1627 atomic_read(&gl->gl_ref));
1730 1628
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 543ec7ecfbda..a602a28f6f08 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -11,15 +11,130 @@
11#define __GLOCK_DOT_H__ 11#define __GLOCK_DOT_H__
12 12
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/parser.h>
14#include "incore.h" 15#include "incore.h"
15 16
16/* Flags for lock requests; used in gfs2_holder gh_flag field. 17/* Options for hostdata parser */
17 From lm_interface.h: 18
19enum {
20 Opt_jid,
21 Opt_id,
22 Opt_first,
23 Opt_nodir,
24 Opt_err,
25};
26
27/*
28 * lm_lockname types
29 */
30
31#define LM_TYPE_RESERVED 0x00
32#define LM_TYPE_NONDISK 0x01
33#define LM_TYPE_INODE 0x02
34#define LM_TYPE_RGRP 0x03
35#define LM_TYPE_META 0x04
36#define LM_TYPE_IOPEN 0x05
37#define LM_TYPE_FLOCK 0x06
38#define LM_TYPE_PLOCK 0x07
39#define LM_TYPE_QUOTA 0x08
40#define LM_TYPE_JOURNAL 0x09
41
42/*
43 * lm_lock() states
44 *
45 * SHARED is compatible with SHARED, not with DEFERRED or EX.
46 * DEFERRED is compatible with DEFERRED, not with SHARED or EX.
47 */
48
49#define LM_ST_UNLOCKED 0
50#define LM_ST_EXCLUSIVE 1
51#define LM_ST_DEFERRED 2
52#define LM_ST_SHARED 3
53
54/*
55 * lm_lock() flags
56 *
57 * LM_FLAG_TRY
58 * Don't wait to acquire the lock if it can't be granted immediately.
59 *
60 * LM_FLAG_TRY_1CB
61 * Send one blocking callback if TRY is set and the lock is not granted.
62 *
63 * LM_FLAG_NOEXP
64 * GFS sets this flag on lock requests it makes while doing journal recovery.
65 * These special requests should not be blocked due to the recovery like
66 * ordinary locks would be.
67 *
68 * LM_FLAG_ANY
69 * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
70 * also be granted in SHARED. The preferred state is whichever is compatible
71 * with other granted locks, or the specified state if no other locks exist.
72 *
73 * LM_FLAG_PRIORITY
74 * Override fairness considerations. Suppose a lock is held in a shared state
75 * and there is a pending request for the deferred state. A shared lock
76 * request with the priority flag would be allowed to bypass the deferred
77 * request and directly join the other shared lock. A shared lock request
78 * without the priority flag might be forced to wait until the deferred
79 * requested had acquired and released the lock.
80 */
81
18#define LM_FLAG_TRY 0x00000001 82#define LM_FLAG_TRY 0x00000001
19#define LM_FLAG_TRY_1CB 0x00000002 83#define LM_FLAG_TRY_1CB 0x00000002
20#define LM_FLAG_NOEXP 0x00000004 84#define LM_FLAG_NOEXP 0x00000004
21#define LM_FLAG_ANY 0x00000008 85#define LM_FLAG_ANY 0x00000008
22#define LM_FLAG_PRIORITY 0x00000010 */ 86#define LM_FLAG_PRIORITY 0x00000010
87#define GL_ASYNC 0x00000040
88#define GL_EXACT 0x00000080
89#define GL_SKIP 0x00000100
90#define GL_ATIME 0x00000200
91#define GL_NOCACHE 0x00000400
92
93/*
94 * lm_lock() and lm_async_cb return flags
95 *
96 * LM_OUT_ST_MASK
97 * Masks the lower two bits of lock state in the returned value.
98 *
99 * LM_OUT_CANCELED
100 * The lock request was canceled.
101 *
102 * LM_OUT_ASYNC
103 * The result of the request will be returned in an LM_CB_ASYNC callback.
104 *
105 */
106
107#define LM_OUT_ST_MASK 0x00000003
108#define LM_OUT_CANCELED 0x00000008
109#define LM_OUT_ASYNC 0x00000080
110#define LM_OUT_ERROR 0x00000100
111
112/*
113 * lm_recovery_done() messages
114 */
115
116#define LM_RD_GAVEUP 308
117#define LM_RD_SUCCESS 309
118
119#define GLR_TRYFAILED 13
120
121struct lm_lockops {
122 const char *lm_proto_name;
123 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
124 void (*lm_unmount) (struct gfs2_sbd *sdp);
125 void (*lm_withdraw) (struct gfs2_sbd *sdp);
126 void (*lm_put_lock) (struct kmem_cache *cachep, void *gl);
127 unsigned int (*lm_lock) (struct gfs2_glock *gl,
128 unsigned int req_state, unsigned int flags);
129 void (*lm_cancel) (struct gfs2_glock *gl);
130 const match_table_t *lm_tokens;
131};
132
133#define LM_FLAG_TRY 0x00000001
134#define LM_FLAG_TRY_1CB 0x00000002
135#define LM_FLAG_NOEXP 0x00000004
136#define LM_FLAG_ANY 0x00000008
137#define LM_FLAG_PRIORITY 0x00000010
23 138
24#define GL_ASYNC 0x00000040 139#define GL_ASYNC 0x00000040
25#define GL_EXACT 0x00000080 140#define GL_EXACT 0x00000080
@@ -128,10 +243,12 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
128int gfs2_lvb_hold(struct gfs2_glock *gl); 243int gfs2_lvb_hold(struct gfs2_glock *gl);
129void gfs2_lvb_unhold(struct gfs2_glock *gl); 244void gfs2_lvb_unhold(struct gfs2_glock *gl);
130 245
131void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); 246void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
247void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
132void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 248void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
133void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 249void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
134void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 250void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
251void gfs2_glock_thaw(struct gfs2_sbd *sdp);
135 252
136int __init gfs2_glock_init(void); 253int __init gfs2_glock_init(void);
137void gfs2_glock_exit(void); 254void gfs2_glock_exit(void);
@@ -141,4 +258,6 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
141int gfs2_register_debugfs(void); 258int gfs2_register_debugfs(void);
142void gfs2_unregister_debugfs(void); 259void gfs2_unregister_debugfs(void);
143 260
261extern const struct lm_lockops gfs2_dlm_ops;
262
144#endif /* __GLOCK_DOT_H__ */ 263#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 8522d3aa64fc..bf23a62aa925 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -12,7 +12,6 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/lm_interface.h>
16#include <linux/bio.h> 15#include <linux/bio.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
@@ -38,20 +37,25 @@
38static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 37static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
39{ 38{
40 struct gfs2_sbd *sdp = gl->gl_sbd; 39 struct gfs2_sbd *sdp = gl->gl_sbd;
41 unsigned int blocks;
42 struct list_head *head = &gl->gl_ail_list; 40 struct list_head *head = &gl->gl_ail_list;
43 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
44 struct buffer_head *bh; 42 struct buffer_head *bh;
45 int error; 43 struct gfs2_trans tr;
46 44
47 blocks = atomic_read(&gl->gl_ail_count); 45 memset(&tr, 0, sizeof(tr));
48 if (!blocks) 46 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
49 return;
50 47
51 error = gfs2_trans_begin(sdp, 0, blocks); 48 if (!tr.tr_revokes)
52 if (gfs2_assert_withdraw(sdp, !error))
53 return; 49 return;
54 50
51 /* A shortened, inline version of gfs2_trans_begin() */
52 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
53 tr.tr_ip = (unsigned long)__builtin_return_address(0);
54 INIT_LIST_HEAD(&tr.tr_list_buf);
55 gfs2_log_reserve(sdp, tr.tr_reserved);
56 BUG_ON(current->journal_info);
57 current->journal_info = &tr;
58
55 gfs2_log_lock(sdp); 59 gfs2_log_lock(sdp);
56 while (!list_empty(head)) { 60 while (!list_empty(head)) {
57 bd = list_entry(head->next, struct gfs2_bufdata, 61 bd = list_entry(head->next, struct gfs2_bufdata,
@@ -72,29 +76,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
72} 76}
73 77
74/** 78/**
75 * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock 79 * rgrp_go_sync - sync out the metadata for this glock
76 * @gl: the glock
77 *
78 */
79
80static void gfs2_pte_inval(struct gfs2_glock *gl)
81{
82 struct gfs2_inode *ip;
83 struct inode *inode;
84
85 ip = gl->gl_object;
86 inode = &ip->i_inode;
87 if (!ip || !S_ISREG(inode->i_mode))
88 return;
89
90 unmap_shared_mapping_range(inode->i_mapping, 0, 0);
91 if (test_bit(GIF_SW_PAGED, &ip->i_flags))
92 set_bit(GLF_DIRTY, &gl->gl_flags);
93
94}
95
96/**
97 * meta_go_sync - sync out the metadata for this glock
98 * @gl: the glock 80 * @gl: the glock
99 * 81 *
100 * Called when demoting or unlocking an EX glock. We must flush 82 * Called when demoting or unlocking an EX glock. We must flush
@@ -102,36 +84,42 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
102 * not return to caller to demote/unlock the glock until I/O is complete. 84 * not return to caller to demote/unlock the glock until I/O is complete.
103 */ 85 */
104 86
105static void meta_go_sync(struct gfs2_glock *gl) 87static void rgrp_go_sync(struct gfs2_glock *gl)
106{ 88{
107 if (gl->gl_state != LM_ST_EXCLUSIVE) 89 struct address_space *metamapping = gl->gl_aspace->i_mapping;
90 int error;
91
92 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
108 return; 93 return;
94 BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
109 95
110 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { 96 gfs2_log_flush(gl->gl_sbd, gl);
111 gfs2_log_flush(gl->gl_sbd, gl); 97 filemap_fdatawrite(metamapping);
112 gfs2_meta_sync(gl); 98 error = filemap_fdatawait(metamapping);
113 gfs2_ail_empty_gl(gl); 99 mapping_set_error(metamapping, error);
114 } 100 gfs2_ail_empty_gl(gl);
115} 101}
116 102
117/** 103/**
118 * meta_go_inval - invalidate the metadata for this glock 104 * rgrp_go_inval - invalidate the metadata for this glock
119 * @gl: the glock 105 * @gl: the glock
120 * @flags: 106 * @flags:
121 * 107 *
108 * We never used LM_ST_DEFERRED with resource groups, so that we
109 * should always see the metadata flag set here.
110 *
122 */ 111 */
123 112
124static void meta_go_inval(struct gfs2_glock *gl, int flags) 113static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
125{ 114{
126 if (!(flags & DIO_METADATA)) 115 struct address_space *mapping = gl->gl_aspace->i_mapping;
127 return;
128 116
129 gfs2_meta_inval(gl); 117 BUG_ON(!(flags & DIO_METADATA));
130 if (gl->gl_object == GFS2_I(gl->gl_sbd->sd_rindex)) 118 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
131 gl->gl_sbd->sd_rindex_uptodate = 0; 119 truncate_inode_pages(mapping, 0);
132 else if (gl->gl_ops == &gfs2_rgrp_glops && gl->gl_object) {
133 struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
134 120
121 if (gl->gl_object) {
122 struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
135 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 123 rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
136 } 124 }
137} 125}
@@ -148,48 +136,54 @@ static void inode_go_sync(struct gfs2_glock *gl)
148 struct address_space *metamapping = gl->gl_aspace->i_mapping; 136 struct address_space *metamapping = gl->gl_aspace->i_mapping;
149 int error; 137 int error;
150 138
151 if (gl->gl_state != LM_ST_UNLOCKED)
152 gfs2_pte_inval(gl);
153 if (gl->gl_state != LM_ST_EXCLUSIVE)
154 return;
155
156 if (ip && !S_ISREG(ip->i_inode.i_mode)) 139 if (ip && !S_ISREG(ip->i_inode.i_mode))
157 ip = NULL; 140 ip = NULL;
141 if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
142 unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
143 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
144 return;
158 145
159 if (test_bit(GLF_DIRTY, &gl->gl_flags)) { 146 BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
160 gfs2_log_flush(gl->gl_sbd, gl); 147
161 filemap_fdatawrite(metamapping); 148 gfs2_log_flush(gl->gl_sbd, gl);
162 if (ip) { 149 filemap_fdatawrite(metamapping);
163 struct address_space *mapping = ip->i_inode.i_mapping; 150 if (ip) {
164 filemap_fdatawrite(mapping); 151 struct address_space *mapping = ip->i_inode.i_mapping;
165 error = filemap_fdatawait(mapping); 152 filemap_fdatawrite(mapping);
166 mapping_set_error(mapping, error); 153 error = filemap_fdatawait(mapping);
167 } 154 mapping_set_error(mapping, error);
168 error = filemap_fdatawait(metamapping);
169 mapping_set_error(metamapping, error);
170 clear_bit(GLF_DIRTY, &gl->gl_flags);
171 gfs2_ail_empty_gl(gl);
172 } 155 }
156 error = filemap_fdatawait(metamapping);
157 mapping_set_error(metamapping, error);
158 gfs2_ail_empty_gl(gl);
173} 159}
174 160
175/** 161/**
176 * inode_go_inval - prepare a inode glock to be released 162 * inode_go_inval - prepare a inode glock to be released
177 * @gl: the glock 163 * @gl: the glock
178 * @flags: 164 * @flags:
165 *
166 * Normally we invlidate everything, but if we are moving into
167 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
168 * can keep hold of the metadata, since it won't have changed.
179 * 169 *
180 */ 170 */
181 171
182static void inode_go_inval(struct gfs2_glock *gl, int flags) 172static void inode_go_inval(struct gfs2_glock *gl, int flags)
183{ 173{
184 struct gfs2_inode *ip = gl->gl_object; 174 struct gfs2_inode *ip = gl->gl_object;
185 int meta = (flags & DIO_METADATA);
186 175
187 if (meta) { 176 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
188 gfs2_meta_inval(gl); 177
178 if (flags & DIO_METADATA) {
179 struct address_space *mapping = gl->gl_aspace->i_mapping;
180 truncate_inode_pages(mapping, 0);
189 if (ip) 181 if (ip)
190 set_bit(GIF_INVALID, &ip->i_flags); 182 set_bit(GIF_INVALID, &ip->i_flags);
191 } 183 }
192 184
185 if (ip == GFS2_I(gl->gl_sbd->sd_rindex))
186 gl->gl_sbd->sd_rindex_uptodate = 0;
193 if (ip && S_ISREG(ip->i_inode.i_mode)) 187 if (ip && S_ISREG(ip->i_inode.i_mode))
194 truncate_inode_pages(ip->i_inode.i_mapping, 0); 188 truncate_inode_pages(ip->i_inode.i_mapping, 0);
195} 189}
@@ -390,20 +384,7 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
390 return 0; 384 return 0;
391} 385}
392 386
393/**
394 * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
395 * @gl: the glock
396 *
397 * Returns: 1 if it's ok
398 */
399
400static int quota_go_demote_ok(const struct gfs2_glock *gl)
401{
402 return !atomic_read(&gl->gl_lvb_count);
403}
404
405const struct gfs2_glock_operations gfs2_meta_glops = { 387const struct gfs2_glock_operations gfs2_meta_glops = {
406 .go_xmote_th = meta_go_sync,
407 .go_type = LM_TYPE_META, 388 .go_type = LM_TYPE_META,
408}; 389};
409 390
@@ -418,8 +399,8 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
418}; 399};
419 400
420const struct gfs2_glock_operations gfs2_rgrp_glops = { 401const struct gfs2_glock_operations gfs2_rgrp_glops = {
421 .go_xmote_th = meta_go_sync, 402 .go_xmote_th = rgrp_go_sync,
422 .go_inval = meta_go_inval, 403 .go_inval = rgrp_go_inval,
423 .go_demote_ok = rgrp_go_demote_ok, 404 .go_demote_ok = rgrp_go_demote_ok,
424 .go_lock = rgrp_go_lock, 405 .go_lock = rgrp_go_lock,
425 .go_unlock = rgrp_go_unlock, 406 .go_unlock = rgrp_go_unlock,
@@ -448,7 +429,6 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = {
448}; 429};
449 430
450const struct gfs2_glock_operations gfs2_quota_glops = { 431const struct gfs2_glock_operations gfs2_quota_glops = {
451 .go_demote_ok = quota_go_demote_ok,
452 .go_type = LM_TYPE_QUOTA, 432 .go_type = LM_TYPE_QUOTA,
453}; 433};
454 434
@@ -456,3 +436,15 @@ const struct gfs2_glock_operations gfs2_journal_glops = {
456 .go_type = LM_TYPE_JOURNAL, 436 .go_type = LM_TYPE_JOURNAL,
457}; 437};
458 438
439const struct gfs2_glock_operations *gfs2_glops_list[] = {
440 [LM_TYPE_META] = &gfs2_meta_glops,
441 [LM_TYPE_INODE] = &gfs2_inode_glops,
442 [LM_TYPE_RGRP] = &gfs2_rgrp_glops,
443 [LM_TYPE_NONDISK] = &gfs2_trans_glops,
444 [LM_TYPE_IOPEN] = &gfs2_iopen_glops,
445 [LM_TYPE_FLOCK] = &gfs2_flock_glops,
446 [LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
447 [LM_TYPE_QUOTA] = &gfs2_quota_glops,
448 [LM_TYPE_JOURNAL] = &gfs2_journal_glops,
449};
450
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index a1d9b5b024e6..b3aa2e3210fd 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -21,5 +21,6 @@ extern const struct gfs2_glock_operations gfs2_flock_glops;
21extern const struct gfs2_glock_operations gfs2_nondisk_glops; 21extern const struct gfs2_glock_operations gfs2_nondisk_glops;
22extern const struct gfs2_glock_operations gfs2_quota_glops; 22extern const struct gfs2_glock_operations gfs2_quota_glops;
23extern const struct gfs2_glock_operations gfs2_journal_glops; 23extern const struct gfs2_glock_operations gfs2_journal_glops;
24extern const struct gfs2_glock_operations *gfs2_glops_list[];
24 25
25#endif /* __GLOPS_DOT_H__ */ 26#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 608849d00021..399d1b978049 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -12,6 +12,8 @@
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/workqueue.h> 14#include <linux/workqueue.h>
15#include <linux/dlm.h>
16#include <linux/buffer_head.h>
15 17
16#define DIO_WAIT 0x00000010 18#define DIO_WAIT 0x00000010
17#define DIO_METADATA 0x00000020 19#define DIO_METADATA 0x00000020
@@ -26,6 +28,7 @@ struct gfs2_trans;
26struct gfs2_ail; 28struct gfs2_ail;
27struct gfs2_jdesc; 29struct gfs2_jdesc;
28struct gfs2_sbd; 30struct gfs2_sbd;
31struct lm_lockops;
29 32
30typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret); 33typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
31 34
@@ -121,6 +124,28 @@ struct gfs2_bufdata {
121 struct list_head bd_ail_gl_list; 124 struct list_head bd_ail_gl_list;
122}; 125};
123 126
127/*
128 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
129 * prefix of lock_dlm_ gets awkward.
130 */
131
132#define GDLM_STRNAME_BYTES 25
133#define GDLM_LVB_SIZE 32
134
135enum {
136 DFL_BLOCK_LOCKS = 0,
137};
138
139struct lm_lockname {
140 u64 ln_number;
141 unsigned int ln_type;
142};
143
144#define lm_name_equal(name1, name2) \
145 (((name1)->ln_number == (name2)->ln_number) && \
146 ((name1)->ln_type == (name2)->ln_type))
147
148
124struct gfs2_glock_operations { 149struct gfs2_glock_operations {
125 void (*go_xmote_th) (struct gfs2_glock *gl); 150 void (*go_xmote_th) (struct gfs2_glock *gl);
126 int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); 151 int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
@@ -162,6 +187,8 @@ enum {
162 GLF_LFLUSH = 7, 187 GLF_LFLUSH = 7,
163 GLF_INVALIDATE_IN_PROGRESS = 8, 188 GLF_INVALIDATE_IN_PROGRESS = 8,
164 GLF_REPLY_PENDING = 9, 189 GLF_REPLY_PENDING = 9,
190 GLF_INITIAL = 10,
191 GLF_FROZEN = 11,
165}; 192};
166 193
167struct gfs2_glock { 194struct gfs2_glock {
@@ -176,16 +203,15 @@ struct gfs2_glock {
176 unsigned int gl_target; 203 unsigned int gl_target;
177 unsigned int gl_reply; 204 unsigned int gl_reply;
178 unsigned int gl_hash; 205 unsigned int gl_hash;
206 unsigned int gl_req;
179 unsigned int gl_demote_state; /* state requested by remote node */ 207 unsigned int gl_demote_state; /* state requested by remote node */
180 unsigned long gl_demote_time; /* time of first demote request */ 208 unsigned long gl_demote_time; /* time of first demote request */
181 struct list_head gl_holders; 209 struct list_head gl_holders;
182 210
183 const struct gfs2_glock_operations *gl_ops; 211 const struct gfs2_glock_operations *gl_ops;
184 void *gl_lock; 212 char gl_strname[GDLM_STRNAME_BYTES];
185 char *gl_lvb; 213 struct dlm_lksb gl_lksb;
186 atomic_t gl_lvb_count; 214 char gl_lvb[32];
187
188 unsigned long gl_stamp;
189 unsigned long gl_tchange; 215 unsigned long gl_tchange;
190 void *gl_object; 216 void *gl_object;
191 217
@@ -283,7 +309,9 @@ enum {
283 309
284struct gfs2_quota_data { 310struct gfs2_quota_data {
285 struct list_head qd_list; 311 struct list_head qd_list;
286 unsigned int qd_count; 312 struct list_head qd_reclaim;
313
314 atomic_t qd_count;
287 315
288 u32 qd_id; 316 u32 qd_id;
289 unsigned long qd_flags; /* QDF_... */ 317 unsigned long qd_flags; /* QDF_... */
@@ -303,7 +331,6 @@ struct gfs2_quota_data {
303 331
304 u64 qd_sync_gen; 332 u64 qd_sync_gen;
305 unsigned long qd_last_warn; 333 unsigned long qd_last_warn;
306 unsigned long qd_last_touched;
307}; 334};
308 335
309struct gfs2_trans { 336struct gfs2_trans {
@@ -390,7 +417,7 @@ struct gfs2_args {
390 unsigned int ar_suiddir:1; /* suiddir support */ 417 unsigned int ar_suiddir:1; /* suiddir support */
391 unsigned int ar_data:2; /* ordered/writeback */ 418 unsigned int ar_data:2; /* ordered/writeback */
392 unsigned int ar_meta:1; /* mount metafs */ 419 unsigned int ar_meta:1; /* mount metafs */
393 unsigned int ar_num_glockd; /* Number of glockd threads */ 420 unsigned int ar_discard:1; /* discard requests */
394}; 421};
395 422
396struct gfs2_tune { 423struct gfs2_tune {
@@ -406,7 +433,6 @@ struct gfs2_tune {
406 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ 433 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
407 unsigned int gt_quota_scale_num; /* Numerator */ 434 unsigned int gt_quota_scale_num; /* Numerator */
408 unsigned int gt_quota_scale_den; /* Denominator */ 435 unsigned int gt_quota_scale_den; /* Denominator */
409 unsigned int gt_quota_cache_secs;
410 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ 436 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
411 unsigned int gt_new_files_jdata; 437 unsigned int gt_new_files_jdata;
412 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ 438 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
@@ -445,6 +471,31 @@ struct gfs2_sb_host {
445 471
446 char sb_lockproto[GFS2_LOCKNAME_LEN]; 472 char sb_lockproto[GFS2_LOCKNAME_LEN];
447 char sb_locktable[GFS2_LOCKNAME_LEN]; 473 char sb_locktable[GFS2_LOCKNAME_LEN];
474 u8 sb_uuid[16];
475};
476
477/*
478 * lm_mount() return values
479 *
480 * ls_jid - the journal ID this node should use
481 * ls_first - this node is the first to mount the file system
482 * ls_lockspace - lock module's context for this file system
483 * ls_ops - lock module's functions
484 */
485
486struct lm_lockstruct {
487 u32 ls_id;
488 unsigned int ls_jid;
489 unsigned int ls_first;
490 unsigned int ls_first_done;
491 unsigned int ls_nodir;
492 const struct lm_lockops *ls_ops;
493 unsigned long ls_flags;
494 dlm_lockspace_t *ls_dlm;
495
496 int ls_recover_jid;
497 int ls_recover_jid_done;
498 int ls_recover_jid_status;
448}; 499};
449 500
450struct gfs2_sbd { 501struct gfs2_sbd {
@@ -520,7 +571,6 @@ struct gfs2_sbd {
520 spinlock_t sd_jindex_spin; 571 spinlock_t sd_jindex_spin;
521 struct mutex sd_jindex_mutex; 572 struct mutex sd_jindex_mutex;
522 unsigned int sd_journals; 573 unsigned int sd_journals;
523 unsigned long sd_jindex_refresh_time;
524 574
525 struct gfs2_jdesc *sd_jdesc; 575 struct gfs2_jdesc *sd_jdesc;
526 struct gfs2_holder sd_journal_gh; 576 struct gfs2_holder sd_journal_gh;
@@ -540,7 +590,6 @@ struct gfs2_sbd {
540 590
541 struct list_head sd_quota_list; 591 struct list_head sd_quota_list;
542 atomic_t sd_quota_count; 592 atomic_t sd_quota_count;
543 spinlock_t sd_quota_spin;
544 struct mutex sd_quota_mutex; 593 struct mutex sd_quota_mutex;
545 wait_queue_head_t sd_quota_wait; 594 wait_queue_head_t sd_quota_wait;
546 struct list_head sd_trunc_list; 595 struct list_head sd_trunc_list;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3b87c188da41..7b277d449155 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -16,7 +16,6 @@
16#include <linux/sort.h> 16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h> 17#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h> 18#include <linux/crc32.h>
19#include <linux/lm_interface.h>
20#include <linux/security.h> 19#include <linux/security.h>
21#include <linux/time.h> 20#include <linux/time.h>
22 21
@@ -137,16 +136,16 @@ void gfs2_set_iop(struct inode *inode)
137 136
138 if (S_ISREG(mode)) { 137 if (S_ISREG(mode)) {
139 inode->i_op = &gfs2_file_iops; 138 inode->i_op = &gfs2_file_iops;
140 if (sdp->sd_args.ar_localflocks) 139 if (gfs2_localflocks(sdp))
141 inode->i_fop = &gfs2_file_fops_nolock; 140 inode->i_fop = gfs2_file_fops_nolock;
142 else 141 else
143 inode->i_fop = &gfs2_file_fops; 142 inode->i_fop = gfs2_file_fops;
144 } else if (S_ISDIR(mode)) { 143 } else if (S_ISDIR(mode)) {
145 inode->i_op = &gfs2_dir_iops; 144 inode->i_op = &gfs2_dir_iops;
146 if (sdp->sd_args.ar_localflocks) 145 if (gfs2_localflocks(sdp))
147 inode->i_fop = &gfs2_dir_fops_nolock; 146 inode->i_fop = gfs2_dir_fops_nolock;
148 else 147 else
149 inode->i_fop = &gfs2_dir_fops; 148 inode->i_fop = gfs2_dir_fops;
150 } else if (S_ISLNK(mode)) { 149 } else if (S_ISLNK(mode)) {
151 inode->i_op = &gfs2_symlink_iops; 150 inode->i_op = &gfs2_symlink_iops;
152 } else { 151 } else {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d5329364cdff..dca4fee3078b 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -101,12 +101,26 @@ void gfs2_dinode_print(const struct gfs2_inode *ip);
101extern const struct inode_operations gfs2_file_iops; 101extern const struct inode_operations gfs2_file_iops;
102extern const struct inode_operations gfs2_dir_iops; 102extern const struct inode_operations gfs2_dir_iops;
103extern const struct inode_operations gfs2_symlink_iops; 103extern const struct inode_operations gfs2_symlink_iops;
104extern const struct file_operations gfs2_file_fops; 104extern const struct file_operations *gfs2_file_fops_nolock;
105extern const struct file_operations gfs2_dir_fops; 105extern const struct file_operations *gfs2_dir_fops_nolock;
106extern const struct file_operations gfs2_file_fops_nolock;
107extern const struct file_operations gfs2_dir_fops_nolock;
108 106
109extern void gfs2_set_inode_flags(struct inode *inode); 107extern void gfs2_set_inode_flags(struct inode *inode);
108
109#ifdef CONFIG_GFS2_FS_LOCKING_DLM
110extern const struct file_operations *gfs2_file_fops;
111extern const struct file_operations *gfs2_dir_fops;
112static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
113{
114 return sdp->sd_args.ar_localflocks;
115}
116#else /* Single node only */
117#define gfs2_file_fops NULL
118#define gfs2_dir_fops NULL
119static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
120{
121 return 1;
122}
123#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
110 124
111#endif /* __INODE_DOT_H__ */ 125#endif /* __INODE_DOT_H__ */
112 126
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
new file mode 100644
index 000000000000..46df988323bc
--- /dev/null
+++ b/fs/gfs2/lock_dlm.c
@@ -0,0 +1,241 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/fs.h>
11#include <linux/dlm.h>
12#include <linux/types.h>
13#include <linux/gfs2_ondisk.h>
14
15#include "incore.h"
16#include "glock.h"
17#include "util.h"
18
19
20static void gdlm_ast(void *arg)
21{
22 struct gfs2_glock *gl = arg;
23 unsigned ret = gl->gl_state;
24
25 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
26
27 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
28 memset(gl->gl_lvb, 0, GDLM_LVB_SIZE);
29
30 switch (gl->gl_lksb.sb_status) {
31 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
32 kmem_cache_free(gfs2_glock_cachep, gl);
33 return;
34 case -DLM_ECANCEL: /* Cancel while getting lock */
35 ret |= LM_OUT_CANCELED;
36 goto out;
37 case -EAGAIN: /* Try lock fails */
38 goto out;
39 case -EINVAL: /* Invalid */
40 case -ENOMEM: /* Out of memory */
41 ret |= LM_OUT_ERROR;
42 goto out;
43 case 0: /* Success */
44 break;
45 default: /* Something unexpected */
46 BUG();
47 }
48
49 ret = gl->gl_req;
50 if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) {
51 if (gl->gl_req == LM_ST_SHARED)
52 ret = LM_ST_DEFERRED;
53 else if (gl->gl_req == LM_ST_DEFERRED)
54 ret = LM_ST_SHARED;
55 else
56 BUG();
57 }
58
59 set_bit(GLF_INITIAL, &gl->gl_flags);
60 gfs2_glock_complete(gl, ret);
61 return;
62out:
63 if (!test_bit(GLF_INITIAL, &gl->gl_flags))
64 gl->gl_lksb.sb_lkid = 0;
65 gfs2_glock_complete(gl, ret);
66}
67
68static void gdlm_bast(void *arg, int mode)
69{
70 struct gfs2_glock *gl = arg;
71
72 switch (mode) {
73 case DLM_LOCK_EX:
74 gfs2_glock_cb(gl, LM_ST_UNLOCKED);
75 break;
76 case DLM_LOCK_CW:
77 gfs2_glock_cb(gl, LM_ST_DEFERRED);
78 break;
79 case DLM_LOCK_PR:
80 gfs2_glock_cb(gl, LM_ST_SHARED);
81 break;
82 default:
83 printk(KERN_ERR "unknown bast mode %d", mode);
84 BUG();
85 }
86}
87
88/* convert gfs lock-state to dlm lock-mode */
89
90static int make_mode(const unsigned int lmstate)
91{
92 switch (lmstate) {
93 case LM_ST_UNLOCKED:
94 return DLM_LOCK_NL;
95 case LM_ST_EXCLUSIVE:
96 return DLM_LOCK_EX;
97 case LM_ST_DEFERRED:
98 return DLM_LOCK_CW;
99 case LM_ST_SHARED:
100 return DLM_LOCK_PR;
101 }
102 printk(KERN_ERR "unknown LM state %d", lmstate);
103 BUG();
104 return -1;
105}
106
107static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
108 const int req)
109{
110 u32 lkf = 0;
111
112 if (gfs_flags & LM_FLAG_TRY)
113 lkf |= DLM_LKF_NOQUEUE;
114
115 if (gfs_flags & LM_FLAG_TRY_1CB) {
116 lkf |= DLM_LKF_NOQUEUE;
117 lkf |= DLM_LKF_NOQUEUEBAST;
118 }
119
120 if (gfs_flags & LM_FLAG_PRIORITY) {
121 lkf |= DLM_LKF_NOORDER;
122 lkf |= DLM_LKF_HEADQUE;
123 }
124
125 if (gfs_flags & LM_FLAG_ANY) {
126 if (req == DLM_LOCK_PR)
127 lkf |= DLM_LKF_ALTCW;
128 else if (req == DLM_LOCK_CW)
129 lkf |= DLM_LKF_ALTPR;
130 else
131 BUG();
132 }
133
134 if (lkid != 0)
135 lkf |= DLM_LKF_CONVERT;
136
137 lkf |= DLM_LKF_VALBLK;
138
139 return lkf;
140}
141
142static unsigned int gdlm_lock(struct gfs2_glock *gl,
143 unsigned int req_state, unsigned int flags)
144{
145 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
146 int error;
147 int req;
148 u32 lkf;
149
150 gl->gl_req = req_state;
151 req = make_mode(req_state);
152 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
153
154 /*
155 * Submit the actual lock request.
156 */
157
158 error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
159 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
160 if (error == -EAGAIN)
161 return 0;
162 if (error)
163 return LM_OUT_ERROR;
164 return LM_OUT_ASYNC;
165}
166
167static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr)
168{
169 struct gfs2_glock *gl = ptr;
170 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
171 int error;
172
173 if (gl->gl_lksb.sb_lkid == 0) {
174 kmem_cache_free(cachep, gl);
175 return;
176 }
177
178 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
179 NULL, gl);
180 if (error) {
181 printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n",
182 gl->gl_name.ln_type,
183 (unsigned long long)gl->gl_name.ln_number, error);
184 return;
185 }
186}
187
188static void gdlm_cancel(struct gfs2_glock *gl)
189{
190 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
191 dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
192}
193
194static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname)
195{
196 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
197 int error;
198
199 if (fsname == NULL) {
200 fs_info(sdp, "no fsname found\n");
201 return -EINVAL;
202 }
203
204 error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm,
205 DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
206 (ls->ls_nodir ? DLM_LSFL_NODIR : 0),
207 GDLM_LVB_SIZE);
208 if (error)
209 printk(KERN_ERR "dlm_new_lockspace error %d", error);
210
211 return error;
212}
213
214static void gdlm_unmount(struct gfs2_sbd *sdp)
215{
216 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
217
218 if (ls->ls_dlm) {
219 dlm_release_lockspace(ls->ls_dlm, 2);
220 ls->ls_dlm = NULL;
221 }
222}
223
224static const match_table_t dlm_tokens = {
225 { Opt_jid, "jid=%d"},
226 { Opt_id, "id=%d"},
227 { Opt_first, "first=%d"},
228 { Opt_nodir, "nodir=%d"},
229 { Opt_err, NULL },
230};
231
232const struct lm_lockops gfs2_dlm_ops = {
233 .lm_proto_name = "lock_dlm",
234 .lm_mount = gdlm_mount,
235 .lm_unmount = gdlm_unmount,
236 .lm_put_lock = gdlm_put_lock,
237 .lm_lock = gdlm_lock,
238 .lm_cancel = gdlm_cancel,
239 .lm_tokens = &dlm_tokens,
240};
241
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
deleted file mode 100644
index 523243a13a21..000000000000
--- a/fs/gfs2/locking.c
+++ /dev/null
@@ -1,232 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/string.h>
13#include <linux/slab.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16#include <linux/kmod.h>
17#include <linux/fs.h>
18#include <linux/delay.h>
19#include <linux/lm_interface.h>
20
21struct lmh_wrapper {
22 struct list_head lw_list;
23 const struct lm_lockops *lw_ops;
24};
25
26static int nolock_mount(char *table_name, char *host_data,
27 lm_callback_t cb, void *cb_data,
28 unsigned int min_lvb_size, int flags,
29 struct lm_lockstruct *lockstruct,
30 struct kobject *fskobj);
31
32/* List of registered low-level locking protocols. A file system selects one
33 of them by name at mount time, e.g. lock_nolock, lock_dlm. */
34
35static const struct lm_lockops nolock_ops = {
36 .lm_proto_name = "lock_nolock",
37 .lm_mount = nolock_mount,
38};
39
40static struct lmh_wrapper nolock_proto = {
41 .lw_list = LIST_HEAD_INIT(nolock_proto.lw_list),
42 .lw_ops = &nolock_ops,
43};
44
45static LIST_HEAD(lmh_list);
46static DEFINE_MUTEX(lmh_lock);
47
48static int nolock_mount(char *table_name, char *host_data,
49 lm_callback_t cb, void *cb_data,
50 unsigned int min_lvb_size, int flags,
51 struct lm_lockstruct *lockstruct,
52 struct kobject *fskobj)
53{
54 char *c;
55 unsigned int jid;
56
57 c = strstr(host_data, "jid=");
58 if (!c)
59 jid = 0;
60 else {
61 c += 4;
62 sscanf(c, "%u", &jid);
63 }
64
65 lockstruct->ls_jid = jid;
66 lockstruct->ls_first = 1;
67 lockstruct->ls_lvb_size = min_lvb_size;
68 lockstruct->ls_ops = &nolock_ops;
69 lockstruct->ls_flags = LM_LSFLAG_LOCAL;
70
71 return 0;
72}
73
74/**
75 * gfs2_register_lockproto - Register a low-level locking protocol
76 * @proto: the protocol definition
77 *
78 * Returns: 0 on success, -EXXX on failure
79 */
80
81int gfs2_register_lockproto(const struct lm_lockops *proto)
82{
83 struct lmh_wrapper *lw;
84
85 mutex_lock(&lmh_lock);
86
87 list_for_each_entry(lw, &lmh_list, lw_list) {
88 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
89 mutex_unlock(&lmh_lock);
90 printk(KERN_INFO "GFS2: protocol %s already exists\n",
91 proto->lm_proto_name);
92 return -EEXIST;
93 }
94 }
95
96 lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
97 if (!lw) {
98 mutex_unlock(&lmh_lock);
99 return -ENOMEM;
100 }
101
102 lw->lw_ops = proto;
103 list_add(&lw->lw_list, &lmh_list);
104
105 mutex_unlock(&lmh_lock);
106
107 return 0;
108}
109
110/**
111 * gfs2_unregister_lockproto - Unregister a low-level locking protocol
112 * @proto: the protocol definition
113 *
114 */
115
116void gfs2_unregister_lockproto(const struct lm_lockops *proto)
117{
118 struct lmh_wrapper *lw;
119
120 mutex_lock(&lmh_lock);
121
122 list_for_each_entry(lw, &lmh_list, lw_list) {
123 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
124 list_del(&lw->lw_list);
125 mutex_unlock(&lmh_lock);
126 kfree(lw);
127 return;
128 }
129 }
130
131 mutex_unlock(&lmh_lock);
132
133 printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
134 proto->lm_proto_name);
135}
136
137/**
138 * gfs2_mount_lockproto - Mount a lock protocol
139 * @proto_name - the name of the protocol
140 * @table_name - the name of the lock space
141 * @host_data - data specific to this host
142 * @cb - the callback to the code using the lock module
143 * @sdp - The GFS2 superblock
144 * @min_lvb_size - the mininum LVB size that the caller can deal with
145 * @flags - LM_MFLAG_*
146 * @lockstruct - a structure returned describing the mount
147 *
148 * Returns: 0 on success, -EXXX on failure
149 */
150
151int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
152 lm_callback_t cb, void *cb_data,
153 unsigned int min_lvb_size, int flags,
154 struct lm_lockstruct *lockstruct,
155 struct kobject *fskobj)
156{
157 struct lmh_wrapper *lw = NULL;
158 int try = 0;
159 int error, found;
160
161
162retry:
163 mutex_lock(&lmh_lock);
164
165 if (list_empty(&nolock_proto.lw_list))
166 list_add(&nolock_proto.lw_list, &lmh_list);
167
168 found = 0;
169 list_for_each_entry(lw, &lmh_list, lw_list) {
170 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
171 found = 1;
172 break;
173 }
174 }
175
176 if (!found) {
177 if (!try && capable(CAP_SYS_MODULE)) {
178 try = 1;
179 mutex_unlock(&lmh_lock);
180 request_module(proto_name);
181 goto retry;
182 }
183 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
184 error = -ENOENT;
185 goto out;
186 }
187
188 if (lw->lw_ops->lm_owner &&
189 !try_module_get(lw->lw_ops->lm_owner)) {
190 try = 0;
191 mutex_unlock(&lmh_lock);
192 msleep(1000);
193 goto retry;
194 }
195
196 error = lw->lw_ops->lm_mount(table_name, host_data, cb, cb_data,
197 min_lvb_size, flags, lockstruct, fskobj);
198 if (error)
199 module_put(lw->lw_ops->lm_owner);
200out:
201 mutex_unlock(&lmh_lock);
202 return error;
203}
204
205void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
206{
207 mutex_lock(&lmh_lock);
208 if (lockstruct->ls_ops->lm_unmount)
209 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
210 if (lockstruct->ls_ops->lm_owner)
211 module_put(lockstruct->ls_ops->lm_owner);
212 mutex_unlock(&lmh_lock);
213}
214
215/**
216 * gfs2_withdraw_lockproto - abnormally unmount a lock module
217 * @lockstruct: the lockstruct passed into mount
218 *
219 */
220
221void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
222{
223 mutex_lock(&lmh_lock);
224 lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
225 if (lockstruct->ls_ops->lm_owner)
226 module_put(lockstruct->ls_ops->lm_owner);
227 mutex_unlock(&lmh_lock);
228}
229
230EXPORT_SYMBOL_GPL(gfs2_register_lockproto);
231EXPORT_SYMBOL_GPL(gfs2_unregister_lockproto);
232
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
deleted file mode 100644
index 2609bb6cd013..000000000000
--- a/fs/gfs2/locking/dlm/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
1obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
2lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o
3
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
deleted file mode 100644
index 2482c9047505..000000000000
--- a/fs/gfs2/locking/dlm/lock.c
+++ /dev/null
@@ -1,708 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12static char junk_lvb[GDLM_LVB_SIZE];
13
14
15/* convert dlm lock-mode to gfs lock-state */
16
17static s16 gdlm_make_lmstate(s16 dlmmode)
18{
19 switch (dlmmode) {
20 case DLM_LOCK_IV:
21 case DLM_LOCK_NL:
22 return LM_ST_UNLOCKED;
23 case DLM_LOCK_EX:
24 return LM_ST_EXCLUSIVE;
25 case DLM_LOCK_CW:
26 return LM_ST_DEFERRED;
27 case DLM_LOCK_PR:
28 return LM_ST_SHARED;
29 }
30 gdlm_assert(0, "unknown DLM mode %d", dlmmode);
31 return -1;
32}
33
34/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
35 thread gets to it. */
36
37static void queue_submit(struct gdlm_lock *lp)
38{
39 struct gdlm_ls *ls = lp->ls;
40
41 spin_lock(&ls->async_lock);
42 list_add_tail(&lp->delay_list, &ls->submit);
43 spin_unlock(&ls->async_lock);
44 wake_up(&ls->thread_wait);
45}
46
47static void wake_up_ast(struct gdlm_lock *lp)
48{
49 clear_bit(LFL_AST_WAIT, &lp->flags);
50 smp_mb__after_clear_bit();
51 wake_up_bit(&lp->flags, LFL_AST_WAIT);
52}
53
54static void gdlm_delete_lp(struct gdlm_lock *lp)
55{
56 struct gdlm_ls *ls = lp->ls;
57
58 spin_lock(&ls->async_lock);
59 if (!list_empty(&lp->delay_list))
60 list_del_init(&lp->delay_list);
61 ls->all_locks_count--;
62 spin_unlock(&ls->async_lock);
63
64 kfree(lp);
65}
66
67static void gdlm_queue_delayed(struct gdlm_lock *lp)
68{
69 struct gdlm_ls *ls = lp->ls;
70
71 spin_lock(&ls->async_lock);
72 list_add_tail(&lp->delay_list, &ls->delayed);
73 spin_unlock(&ls->async_lock);
74}
75
76static void process_complete(struct gdlm_lock *lp)
77{
78 struct gdlm_ls *ls = lp->ls;
79 struct lm_async_cb acb;
80
81 memset(&acb, 0, sizeof(acb));
82
83 if (lp->lksb.sb_status == -DLM_ECANCEL) {
84 log_info("complete dlm cancel %x,%llx flags %lx",
85 lp->lockname.ln_type,
86 (unsigned long long)lp->lockname.ln_number,
87 lp->flags);
88
89 lp->req = lp->cur;
90 acb.lc_ret |= LM_OUT_CANCELED;
91 if (lp->cur == DLM_LOCK_IV)
92 lp->lksb.sb_lkid = 0;
93 goto out;
94 }
95
96 if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
97 if (lp->lksb.sb_status != -DLM_EUNLOCK) {
98 log_info("unlock sb_status %d %x,%llx flags %lx",
99 lp->lksb.sb_status, lp->lockname.ln_type,
100 (unsigned long long)lp->lockname.ln_number,
101 lp->flags);
102 return;
103 }
104
105 lp->cur = DLM_LOCK_IV;
106 lp->req = DLM_LOCK_IV;
107 lp->lksb.sb_lkid = 0;
108
109 if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
110 gdlm_delete_lp(lp);
111 return;
112 }
113 goto out;
114 }
115
116 if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
117 memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
118
119 if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
120 if (lp->req == DLM_LOCK_PR)
121 lp->req = DLM_LOCK_CW;
122 else if (lp->req == DLM_LOCK_CW)
123 lp->req = DLM_LOCK_PR;
124 }
125
126 /*
127 * A canceled lock request. The lock was just taken off the delayed
128 * list and was never even submitted to dlm.
129 */
130
131 if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
132 log_info("complete internal cancel %x,%llx",
133 lp->lockname.ln_type,
134 (unsigned long long)lp->lockname.ln_number);
135 lp->req = lp->cur;
136 acb.lc_ret |= LM_OUT_CANCELED;
137 goto out;
138 }
139
140 /*
141 * An error occured.
142 */
143
144 if (lp->lksb.sb_status) {
145 /* a "normal" error */
146 if ((lp->lksb.sb_status == -EAGAIN) &&
147 (lp->lkf & DLM_LKF_NOQUEUE)) {
148 lp->req = lp->cur;
149 if (lp->cur == DLM_LOCK_IV)
150 lp->lksb.sb_lkid = 0;
151 goto out;
152 }
153
154 /* this could only happen with cancels I think */
155 log_info("ast sb_status %d %x,%llx flags %lx",
156 lp->lksb.sb_status, lp->lockname.ln_type,
157 (unsigned long long)lp->lockname.ln_number,
158 lp->flags);
159 return;
160 }
161
162 /*
163 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
164 */
165
166 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
167 wake_up_ast(lp);
168 return;
169 }
170
171 /*
172 * A lock has been demoted to NL because it initially completed during
173 * BLOCK_LOCKS. Now it must be requested in the originally requested
174 * mode.
175 */
176
177 if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
178 gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
179 lp->lockname.ln_type,
180 (unsigned long long)lp->lockname.ln_number);
181 gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
182 lp->lockname.ln_type,
183 (unsigned long long)lp->lockname.ln_number);
184
185 lp->cur = DLM_LOCK_NL;
186 lp->req = lp->prev_req;
187 lp->prev_req = DLM_LOCK_IV;
188 lp->lkf &= ~DLM_LKF_CONVDEADLK;
189
190 set_bit(LFL_NOCACHE, &lp->flags);
191
192 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
193 !test_bit(LFL_NOBLOCK, &lp->flags))
194 gdlm_queue_delayed(lp);
195 else
196 queue_submit(lp);
197 return;
198 }
199
200 /*
201 * A request is granted during dlm recovery. It may be granted
202 * because the locks of a failed node were cleared. In that case,
203 * there may be inconsistent data beneath this lock and we must wait
204 * for recovery to complete to use it. When gfs recovery is done this
205 * granted lock will be converted to NL and then reacquired in this
206 * granted state.
207 */
208
209 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
210 !test_bit(LFL_NOBLOCK, &lp->flags) &&
211 lp->req != DLM_LOCK_NL) {
212
213 lp->cur = lp->req;
214 lp->prev_req = lp->req;
215 lp->req = DLM_LOCK_NL;
216 lp->lkf |= DLM_LKF_CONVERT;
217 lp->lkf &= ~DLM_LKF_CONVDEADLK;
218
219 log_debug("rereq %x,%llx id %x %d,%d",
220 lp->lockname.ln_type,
221 (unsigned long long)lp->lockname.ln_number,
222 lp->lksb.sb_lkid, lp->cur, lp->req);
223
224 set_bit(LFL_REREQUEST, &lp->flags);
225 queue_submit(lp);
226 return;
227 }
228
229 /*
230 * DLM demoted the lock to NL before it was granted so GFS must be
231 * told it cannot cache data for this lock.
232 */
233
234 if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
235 set_bit(LFL_NOCACHE, &lp->flags);
236
237out:
238 /*
239 * This is an internal lock_dlm lock
240 */
241
242 if (test_bit(LFL_INLOCK, &lp->flags)) {
243 clear_bit(LFL_NOBLOCK, &lp->flags);
244 lp->cur = lp->req;
245 wake_up_ast(lp);
246 return;
247 }
248
249 /*
250 * Normal completion of a lock request. Tell GFS it now has the lock.
251 */
252
253 clear_bit(LFL_NOBLOCK, &lp->flags);
254 lp->cur = lp->req;
255
256 acb.lc_name = lp->lockname;
257 acb.lc_ret |= gdlm_make_lmstate(lp->cur);
258
259 ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
260}
261
262static void gdlm_ast(void *astarg)
263{
264 struct gdlm_lock *lp = astarg;
265 clear_bit(LFL_ACTIVE, &lp->flags);
266 process_complete(lp);
267}
268
269static void process_blocking(struct gdlm_lock *lp, int bast_mode)
270{
271 struct gdlm_ls *ls = lp->ls;
272 unsigned int cb = 0;
273
274 switch (gdlm_make_lmstate(bast_mode)) {
275 case LM_ST_EXCLUSIVE:
276 cb = LM_CB_NEED_E;
277 break;
278 case LM_ST_DEFERRED:
279 cb = LM_CB_NEED_D;
280 break;
281 case LM_ST_SHARED:
282 cb = LM_CB_NEED_S;
283 break;
284 default:
285 gdlm_assert(0, "unknown bast mode %u", bast_mode);
286 }
287
288 ls->fscb(ls->sdp, cb, &lp->lockname);
289}
290
291
292static void gdlm_bast(void *astarg, int mode)
293{
294 struct gdlm_lock *lp = astarg;
295
296 if (!mode) {
297 printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
298 lp->lockname.ln_type,
299 (unsigned long long)lp->lockname.ln_number);
300 return;
301 }
302
303 process_blocking(lp, mode);
304}
305
306/* convert gfs lock-state to dlm lock-mode */
307
308static s16 make_mode(s16 lmstate)
309{
310 switch (lmstate) {
311 case LM_ST_UNLOCKED:
312 return DLM_LOCK_NL;
313 case LM_ST_EXCLUSIVE:
314 return DLM_LOCK_EX;
315 case LM_ST_DEFERRED:
316 return DLM_LOCK_CW;
317 case LM_ST_SHARED:
318 return DLM_LOCK_PR;
319 }
320 gdlm_assert(0, "unknown LM state %d", lmstate);
321 return -1;
322}
323
324
325/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
326 DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
327
328static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
329{
330 s16 cur = make_mode(cur_state);
331 if (lp->cur != DLM_LOCK_IV)
332 gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
333}
334
335static inline unsigned int make_flags(struct gdlm_lock *lp,
336 unsigned int gfs_flags,
337 s16 cur, s16 req)
338{
339 unsigned int lkf = 0;
340
341 if (gfs_flags & LM_FLAG_TRY)
342 lkf |= DLM_LKF_NOQUEUE;
343
344 if (gfs_flags & LM_FLAG_TRY_1CB) {
345 lkf |= DLM_LKF_NOQUEUE;
346 lkf |= DLM_LKF_NOQUEUEBAST;
347 }
348
349 if (gfs_flags & LM_FLAG_PRIORITY) {
350 lkf |= DLM_LKF_NOORDER;
351 lkf |= DLM_LKF_HEADQUE;
352 }
353
354 if (gfs_flags & LM_FLAG_ANY) {
355 if (req == DLM_LOCK_PR)
356 lkf |= DLM_LKF_ALTCW;
357 else if (req == DLM_LOCK_CW)
358 lkf |= DLM_LKF_ALTPR;
359 }
360
361 if (lp->lksb.sb_lkid != 0) {
362 lkf |= DLM_LKF_CONVERT;
363 }
364
365 if (lp->lvb)
366 lkf |= DLM_LKF_VALBLK;
367
368 return lkf;
369}
370
371/* make_strname - convert GFS lock numbers to a string */
372
373static inline void make_strname(const struct lm_lockname *lockname,
374 struct gdlm_strname *str)
375{
376 sprintf(str->name, "%8x%16llx", lockname->ln_type,
377 (unsigned long long)lockname->ln_number);
378 str->namelen = GDLM_STRNAME_BYTES;
379}
380
381static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
382 struct gdlm_lock **lpp)
383{
384 struct gdlm_lock *lp;
385
386 lp = kzalloc(sizeof(struct gdlm_lock), GFP_NOFS);
387 if (!lp)
388 return -ENOMEM;
389
390 lp->lockname = *name;
391 make_strname(name, &lp->strname);
392 lp->ls = ls;
393 lp->cur = DLM_LOCK_IV;
394 INIT_LIST_HEAD(&lp->delay_list);
395
396 spin_lock(&ls->async_lock);
397 ls->all_locks_count++;
398 spin_unlock(&ls->async_lock);
399
400 *lpp = lp;
401 return 0;
402}
403
404int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
405 void **lockp)
406{
407 struct gdlm_lock *lp;
408 int error;
409
410 error = gdlm_create_lp(lockspace, name, &lp);
411
412 *lockp = lp;
413 return error;
414}
415
416void gdlm_put_lock(void *lock)
417{
418 gdlm_delete_lp(lock);
419}
420
421unsigned int gdlm_do_lock(struct gdlm_lock *lp)
422{
423 struct gdlm_ls *ls = lp->ls;
424 int error, bast = 1;
425
426 /*
427 * When recovery is in progress, delay lock requests for submission
428 * once recovery is done. Requests for recovery (NOEXP) and unlocks
429 * can pass.
430 */
431
432 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
433 !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
434 gdlm_queue_delayed(lp);
435 return LM_OUT_ASYNC;
436 }
437
438 /*
439 * Submit the actual lock request.
440 */
441
442 if (test_bit(LFL_NOBAST, &lp->flags))
443 bast = 0;
444
445 set_bit(LFL_ACTIVE, &lp->flags);
446
447 log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
448 (unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid,
449 lp->cur, lp->req, lp->lkf);
450
451 error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
452 lp->strname.name, lp->strname.namelen, 0, gdlm_ast,
453 lp, bast ? gdlm_bast : NULL);
454
455 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
456 lp->lksb.sb_status = -EAGAIN;
457 gdlm_ast(lp);
458 error = 0;
459 }
460
461 if (error) {
462 log_error("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
463 "flags=%lx", ls->fsname, lp->lockname.ln_type,
464 (unsigned long long)lp->lockname.ln_number, error,
465 lp->cur, lp->req, lp->lkf, lp->flags);
466 return LM_OUT_ERROR;
467 }
468 return LM_OUT_ASYNC;
469}
470
471static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
472{
473 struct gdlm_ls *ls = lp->ls;
474 unsigned int lkf = 0;
475 int error;
476
477 set_bit(LFL_DLM_UNLOCK, &lp->flags);
478 set_bit(LFL_ACTIVE, &lp->flags);
479
480 if (lp->lvb)
481 lkf = DLM_LKF_VALBLK;
482
483 log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
484 (unsigned long long)lp->lockname.ln_number,
485 lp->lksb.sb_lkid, lp->cur, lkf);
486
487 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
488
489 if (error) {
490 log_error("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
491 "flags=%lx", ls->fsname, lp->lockname.ln_type,
492 (unsigned long long)lp->lockname.ln_number, error,
493 lp->cur, lp->req, lp->lkf, lp->flags);
494 return LM_OUT_ERROR;
495 }
496 return LM_OUT_ASYNC;
497}
498
499unsigned int gdlm_lock(void *lock, unsigned int cur_state,
500 unsigned int req_state, unsigned int flags)
501{
502 struct gdlm_lock *lp = lock;
503
504 if (req_state == LM_ST_UNLOCKED)
505 return gdlm_unlock(lock, cur_state);
506
507 if (req_state == LM_ST_UNLOCKED)
508 return gdlm_unlock(lock, cur_state);
509
510 clear_bit(LFL_DLM_CANCEL, &lp->flags);
511 if (flags & LM_FLAG_NOEXP)
512 set_bit(LFL_NOBLOCK, &lp->flags);
513
514 check_cur_state(lp, cur_state);
515 lp->req = make_mode(req_state);
516 lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
517
518 return gdlm_do_lock(lp);
519}
520
521unsigned int gdlm_unlock(void *lock, unsigned int cur_state)
522{
523 struct gdlm_lock *lp = lock;
524
525 clear_bit(LFL_DLM_CANCEL, &lp->flags);
526 if (lp->cur == DLM_LOCK_IV)
527 return 0;
528 return gdlm_do_unlock(lp);
529}
530
531void gdlm_cancel(void *lock)
532{
533 struct gdlm_lock *lp = lock;
534 struct gdlm_ls *ls = lp->ls;
535 int error, delay_list = 0;
536
537 if (test_bit(LFL_DLM_CANCEL, &lp->flags))
538 return;
539
540 log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type,
541 (unsigned long long)lp->lockname.ln_number, lp->flags);
542
543 spin_lock(&ls->async_lock);
544 if (!list_empty(&lp->delay_list)) {
545 list_del_init(&lp->delay_list);
546 delay_list = 1;
547 }
548 spin_unlock(&ls->async_lock);
549
550 if (delay_list) {
551 set_bit(LFL_CANCEL, &lp->flags);
552 set_bit(LFL_ACTIVE, &lp->flags);
553 gdlm_ast(lp);
554 return;
555 }
556
557 if (!test_bit(LFL_ACTIVE, &lp->flags) ||
558 test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
559 log_info("gdlm_cancel skip %x,%llx flags %lx",
560 lp->lockname.ln_type,
561 (unsigned long long)lp->lockname.ln_number, lp->flags);
562 return;
563 }
564
565 /* the lock is blocked in the dlm */
566
567 set_bit(LFL_DLM_CANCEL, &lp->flags);
568 set_bit(LFL_ACTIVE, &lp->flags);
569
570 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
571 NULL, lp);
572
573 log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
574 lp->lockname.ln_type,
575 (unsigned long long)lp->lockname.ln_number, lp->flags);
576
577 if (error == -EBUSY)
578 clear_bit(LFL_DLM_CANCEL, &lp->flags);
579}
580
581static int gdlm_add_lvb(struct gdlm_lock *lp)
582{
583 char *lvb;
584
585 lvb = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
586 if (!lvb)
587 return -ENOMEM;
588
589 lp->lksb.sb_lvbptr = lvb;
590 lp->lvb = lvb;
591 return 0;
592}
593
594static void gdlm_del_lvb(struct gdlm_lock *lp)
595{
596 kfree(lp->lvb);
597 lp->lvb = NULL;
598 lp->lksb.sb_lvbptr = NULL;
599}
600
601static int gdlm_ast_wait(void *word)
602{
603 schedule();
604 return 0;
605}
606
607/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
608 the completion) because gfs won't call hold_lvb() during a callback (from
609 the context of a lock_dlm thread). */
610
611static int hold_null_lock(struct gdlm_lock *lp)
612{
613 struct gdlm_lock *lpn = NULL;
614 int error;
615
616 if (lp->hold_null) {
617 printk(KERN_INFO "lock_dlm: lvb already held\n");
618 return 0;
619 }
620
621 error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
622 if (error)
623 goto out;
624
625 lpn->lksb.sb_lvbptr = junk_lvb;
626 lpn->lvb = junk_lvb;
627
628 lpn->req = DLM_LOCK_NL;
629 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
630 set_bit(LFL_NOBAST, &lpn->flags);
631 set_bit(LFL_INLOCK, &lpn->flags);
632 set_bit(LFL_AST_WAIT, &lpn->flags);
633
634 gdlm_do_lock(lpn);
635 wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
636 error = lpn->lksb.sb_status;
637 if (error) {
638 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
639 error);
640 gdlm_delete_lp(lpn);
641 lpn = NULL;
642 }
643out:
644 lp->hold_null = lpn;
645 return error;
646}
647
648/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
649 the completion) because gfs may call unhold_lvb() during a callback (from
650 the context of a lock_dlm thread) which could cause a deadlock since the
651 other lock_dlm thread could be engaged in recovery. */
652
653static void unhold_null_lock(struct gdlm_lock *lp)
654{
655 struct gdlm_lock *lpn = lp->hold_null;
656
657 gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type,
658 (unsigned long long)lp->lockname.ln_number);
659 lpn->lksb.sb_lvbptr = NULL;
660 lpn->lvb = NULL;
661 set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
662 gdlm_do_unlock(lpn);
663 lp->hold_null = NULL;
664}
665
666/* Acquire a NL lock because gfs requires the value block to remain
667 intact on the resource while the lvb is "held" even if it's holding no locks
668 on the resource. */
669
670int gdlm_hold_lvb(void *lock, char **lvbp)
671{
672 struct gdlm_lock *lp = lock;
673 int error;
674
675 error = gdlm_add_lvb(lp);
676 if (error)
677 return error;
678
679 *lvbp = lp->lvb;
680
681 error = hold_null_lock(lp);
682 if (error)
683 gdlm_del_lvb(lp);
684
685 return error;
686}
687
688void gdlm_unhold_lvb(void *lock, char *lvb)
689{
690 struct gdlm_lock *lp = lock;
691
692 unhold_null_lock(lp);
693 gdlm_del_lvb(lp);
694}
695
696void gdlm_submit_delayed(struct gdlm_ls *ls)
697{
698 struct gdlm_lock *lp, *safe;
699
700 spin_lock(&ls->async_lock);
701 list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
702 list_del_init(&lp->delay_list);
703 list_add_tail(&lp->delay_list, &ls->submit);
704 }
705 spin_unlock(&ls->async_lock);
706 wake_up(&ls->thread_wait);
707}
708
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
deleted file mode 100644
index 3c98e7c6f93b..000000000000
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ /dev/null
@@ -1,166 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef LOCK_DLM_DOT_H
11#define LOCK_DLM_DOT_H
12
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/spinlock.h>
16#include <linux/types.h>
17#include <linux/string.h>
18#include <linux/list.h>
19#include <linux/socket.h>
20#include <linux/delay.h>
21#include <linux/kthread.h>
22#include <linux/kobject.h>
23#include <linux/fcntl.h>
24#include <linux/wait.h>
25#include <net/sock.h>
26
27#include <linux/dlm.h>
28#include <linux/dlm_plock.h>
29#include <linux/lm_interface.h>
30
31/*
32 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
33 * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
34 * as "lock_dlm".
35 */
36
37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 0
40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128
42
43/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
44 We sprintf these numbers into a 24 byte string of hex values to make them
45 human-readable (to make debugging simpler.) */
46
47struct gdlm_strname {
48 unsigned char name[GDLM_STRNAME_BYTES];
49 unsigned short namelen;
50};
51
52enum {
53 DFL_BLOCK_LOCKS = 0,
54 DFL_SPECTATOR = 1,
55 DFL_WITHDRAW = 2,
56};
57
58struct gdlm_ls {
59 u32 id;
60 int jid;
61 int first;
62 int first_done;
63 unsigned long flags;
64 struct kobject kobj;
65 char clustername[GDLM_NAME_LEN];
66 char fsname[GDLM_NAME_LEN];
67 int fsflags;
68 dlm_lockspace_t *dlm_lockspace;
69 lm_callback_t fscb;
70 struct gfs2_sbd *sdp;
71 int recover_jid;
72 int recover_jid_done;
73 int recover_jid_status;
74 spinlock_t async_lock;
75 struct list_head delayed;
76 struct list_head submit;
77 u32 all_locks_count;
78 wait_queue_head_t wait_control;
79 struct task_struct *thread;
80 wait_queue_head_t thread_wait;
81};
82
83enum {
84 LFL_NOBLOCK = 0,
85 LFL_NOCACHE = 1,
86 LFL_DLM_UNLOCK = 2,
87 LFL_DLM_CANCEL = 3,
88 LFL_SYNC_LVB = 4,
89 LFL_FORCE_PROMOTE = 5,
90 LFL_REREQUEST = 6,
91 LFL_ACTIVE = 7,
92 LFL_INLOCK = 8,
93 LFL_CANCEL = 9,
94 LFL_NOBAST = 10,
95 LFL_HEADQUE = 11,
96 LFL_UNLOCK_DELETE = 12,
97 LFL_AST_WAIT = 13,
98};
99
100struct gdlm_lock {
101 struct gdlm_ls *ls;
102 struct lm_lockname lockname;
103 struct gdlm_strname strname;
104 char *lvb;
105 struct dlm_lksb lksb;
106
107 s16 cur;
108 s16 req;
109 s16 prev_req;
110 u32 lkf; /* dlm flags DLM_LKF_ */
111 unsigned long flags; /* lock_dlm flags LFL_ */
112
113 struct list_head delay_list; /* delayed */
114 struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
115};
116
117#define gdlm_assert(assertion, fmt, args...) \
118do { \
119 if (unlikely(!(assertion))) { \
120 printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
121 "lock_dlm: " fmt "\n", \
122 #assertion, ##args); \
123 BUG(); \
124 } \
125} while (0)
126
127#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
128#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
129#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
130#ifdef LOCK_DLM_LOG_DEBUG
131#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
132#else
133#define log_debug(fmt, arg...)
134#endif
135
136/* sysfs.c */
137
138int gdlm_sysfs_init(void);
139void gdlm_sysfs_exit(void);
140int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
141void gdlm_kobject_release(struct gdlm_ls *);
142
143/* thread.c */
144
145int gdlm_init_threads(struct gdlm_ls *);
146void gdlm_release_threads(struct gdlm_ls *);
147
148/* lock.c */
149
150void gdlm_submit_delayed(struct gdlm_ls *);
151unsigned int gdlm_do_lock(struct gdlm_lock *);
152
153int gdlm_get_lock(void *, struct lm_lockname *, void **);
154void gdlm_put_lock(void *);
155unsigned int gdlm_lock(void *, unsigned int, unsigned int, unsigned int);
156unsigned int gdlm_unlock(void *, unsigned int);
157void gdlm_cancel(void *);
158int gdlm_hold_lvb(void *, char **);
159void gdlm_unhold_lvb(void *, char *);
160
161/* mount.c */
162
163extern const struct lm_lockops gdlm_ops;
164
165#endif
166
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
deleted file mode 100644
index b9a03a7ff801..000000000000
--- a/fs/gfs2/locking/dlm/main.c
+++ /dev/null
@@ -1,48 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/init.h>
11
12#include "lock_dlm.h"
13
14static int __init init_lock_dlm(void)
15{
16 int error;
17
18 error = gfs2_register_lockproto(&gdlm_ops);
19 if (error) {
20 printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
21 error);
22 return error;
23 }
24
25 error = gdlm_sysfs_init();
26 if (error) {
27 gfs2_unregister_lockproto(&gdlm_ops);
28 return error;
29 }
30
31 printk(KERN_INFO
32 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
33 return 0;
34}
35
36static void __exit exit_lock_dlm(void)
37{
38 gdlm_sysfs_exit();
39 gfs2_unregister_lockproto(&gdlm_ops);
40}
41
42module_init(init_lock_dlm);
43module_exit(exit_lock_dlm);
44
45MODULE_DESCRIPTION("GFS DLM Locking Module");
46MODULE_AUTHOR("Red Hat, Inc.");
47MODULE_LICENSE("GPL");
48
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
deleted file mode 100644
index 1aa7eb6a0226..000000000000
--- a/fs/gfs2/locking/dlm/mount.c
+++ /dev/null
@@ -1,276 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12const struct lm_lockops gdlm_ops;
13
14
15static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
16 int flags, char *table_name)
17{
18 struct gdlm_ls *ls;
19 char buf[256], *p;
20
21 ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
22 if (!ls)
23 return NULL;
24
25 ls->fscb = cb;
26 ls->sdp = sdp;
27 ls->fsflags = flags;
28 spin_lock_init(&ls->async_lock);
29 INIT_LIST_HEAD(&ls->delayed);
30 INIT_LIST_HEAD(&ls->submit);
31 init_waitqueue_head(&ls->thread_wait);
32 init_waitqueue_head(&ls->wait_control);
33 ls->jid = -1;
34
35 strncpy(buf, table_name, 256);
36 buf[255] = '\0';
37
38 p = strchr(buf, ':');
39 if (!p) {
40 log_info("invalid table_name \"%s\"", table_name);
41 kfree(ls);
42 return NULL;
43 }
44 *p = '\0';
45 p++;
46
47 strncpy(ls->clustername, buf, GDLM_NAME_LEN);
48 strncpy(ls->fsname, p, GDLM_NAME_LEN);
49
50 return ls;
51}
52
53static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
54{
55 char data[256];
56 char *options, *x, *y;
57 int error = 0;
58
59 memset(data, 0, 256);
60 strncpy(data, data_arg, 255);
61
62 if (!strlen(data)) {
63 log_error("no mount options, (u)mount helpers not installed");
64 return -EINVAL;
65 }
66
67 for (options = data; (x = strsep(&options, ":")); ) {
68 if (!*x)
69 continue;
70
71 y = strchr(x, '=');
72 if (y)
73 *y++ = 0;
74
75 if (!strcmp(x, "jid")) {
76 if (!y) {
77 log_error("need argument to jid");
78 error = -EINVAL;
79 break;
80 }
81 sscanf(y, "%u", &ls->jid);
82
83 } else if (!strcmp(x, "first")) {
84 if (!y) {
85 log_error("need argument to first");
86 error = -EINVAL;
87 break;
88 }
89 sscanf(y, "%u", &ls->first);
90
91 } else if (!strcmp(x, "id")) {
92 if (!y) {
93 log_error("need argument to id");
94 error = -EINVAL;
95 break;
96 }
97 sscanf(y, "%u", &ls->id);
98
99 } else if (!strcmp(x, "nodir")) {
100 if (!y) {
101 log_error("need argument to nodir");
102 error = -EINVAL;
103 break;
104 }
105 sscanf(y, "%u", nodir);
106
107 } else {
108 log_error("unkonwn option: %s", x);
109 error = -EINVAL;
110 break;
111 }
112 }
113
114 return error;
115}
116
117static int gdlm_mount(char *table_name, char *host_data,
118 lm_callback_t cb, void *cb_data,
119 unsigned int min_lvb_size, int flags,
120 struct lm_lockstruct *lockstruct,
121 struct kobject *fskobj)
122{
123 struct gdlm_ls *ls;
124 int error = -ENOMEM, nodir = 0;
125
126 if (min_lvb_size > GDLM_LVB_SIZE)
127 goto out;
128
129 ls = init_gdlm(cb, cb_data, flags, table_name);
130 if (!ls)
131 goto out;
132
133 error = make_args(ls, host_data, &nodir);
134 if (error)
135 goto out;
136
137 error = gdlm_init_threads(ls);
138 if (error)
139 goto out_free;
140
141 error = gdlm_kobject_setup(ls, fskobj);
142 if (error)
143 goto out_thread;
144
145 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
146 &ls->dlm_lockspace,
147 DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
148 (nodir ? DLM_LSFL_NODIR : 0),
149 GDLM_LVB_SIZE);
150 if (error) {
151 log_error("dlm_new_lockspace error %d", error);
152 goto out_kobj;
153 }
154
155 lockstruct->ls_jid = ls->jid;
156 lockstruct->ls_first = ls->first;
157 lockstruct->ls_lockspace = ls;
158 lockstruct->ls_ops = &gdlm_ops;
159 lockstruct->ls_flags = 0;
160 lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
161 return 0;
162
163out_kobj:
164 gdlm_kobject_release(ls);
165out_thread:
166 gdlm_release_threads(ls);
167out_free:
168 kfree(ls);
169out:
170 return error;
171}
172
173static void gdlm_unmount(void *lockspace)
174{
175 struct gdlm_ls *ls = lockspace;
176
177 log_debug("unmount flags %lx", ls->flags);
178
179 /* FIXME: serialize unmount and withdraw in case they
180 happen at once. Also, if unmount follows withdraw,
181 wait for withdraw to finish. */
182
183 if (test_bit(DFL_WITHDRAW, &ls->flags))
184 goto out;
185
186 gdlm_kobject_release(ls);
187 dlm_release_lockspace(ls->dlm_lockspace, 2);
188 gdlm_release_threads(ls);
189 BUG_ON(ls->all_locks_count);
190out:
191 kfree(ls);
192}
193
194static void gdlm_recovery_done(void *lockspace, unsigned int jid,
195 unsigned int message)
196{
197 char env_jid[20];
198 char env_status[20];
199 char *envp[] = { env_jid, env_status, NULL };
200 struct gdlm_ls *ls = lockspace;
201 ls->recover_jid_done = jid;
202 ls->recover_jid_status = message;
203 sprintf(env_jid, "JID=%d", jid);
204 sprintf(env_status, "RECOVERY=%s",
205 message == LM_RD_SUCCESS ? "Done" : "Failed");
206 kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
207}
208
209static void gdlm_others_may_mount(void *lockspace)
210{
211 char *message = "FIRSTMOUNT=Done";
212 char *envp[] = { message, NULL };
213 struct gdlm_ls *ls = lockspace;
214 ls->first_done = 1;
215 kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
216}
217
218/* Userspace gets the offline uevent, blocks new gfs locks on
219 other mounters, and lets us know (sets WITHDRAW flag). Then,
220 userspace leaves the mount group while we leave the lockspace. */
221
222static void gdlm_withdraw(void *lockspace)
223{
224 struct gdlm_ls *ls = lockspace;
225
226 kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
227
228 wait_event_interruptible(ls->wait_control,
229 test_bit(DFL_WITHDRAW, &ls->flags));
230
231 dlm_release_lockspace(ls->dlm_lockspace, 2);
232 gdlm_release_threads(ls);
233 gdlm_kobject_release(ls);
234}
235
236static int gdlm_plock(void *lockspace, struct lm_lockname *name,
237 struct file *file, int cmd, struct file_lock *fl)
238{
239 struct gdlm_ls *ls = lockspace;
240 return dlm_posix_lock(ls->dlm_lockspace, name->ln_number, file, cmd, fl);
241}
242
243static int gdlm_punlock(void *lockspace, struct lm_lockname *name,
244 struct file *file, struct file_lock *fl)
245{
246 struct gdlm_ls *ls = lockspace;
247 return dlm_posix_unlock(ls->dlm_lockspace, name->ln_number, file, fl);
248}
249
250static int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
251 struct file *file, struct file_lock *fl)
252{
253 struct gdlm_ls *ls = lockspace;
254 return dlm_posix_get(ls->dlm_lockspace, name->ln_number, file, fl);
255}
256
257const struct lm_lockops gdlm_ops = {
258 .lm_proto_name = "lock_dlm",
259 .lm_mount = gdlm_mount,
260 .lm_others_may_mount = gdlm_others_may_mount,
261 .lm_unmount = gdlm_unmount,
262 .lm_withdraw = gdlm_withdraw,
263 .lm_get_lock = gdlm_get_lock,
264 .lm_put_lock = gdlm_put_lock,
265 .lm_lock = gdlm_lock,
266 .lm_unlock = gdlm_unlock,
267 .lm_plock = gdlm_plock,
268 .lm_punlock = gdlm_punlock,
269 .lm_plock_get = gdlm_plock_get,
270 .lm_cancel = gdlm_cancel,
271 .lm_hold_lvb = gdlm_hold_lvb,
272 .lm_unhold_lvb = gdlm_unhold_lvb,
273 .lm_recovery_done = gdlm_recovery_done,
274 .lm_owner = THIS_MODULE,
275};
276
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
deleted file mode 100644
index 9b7edcf7bd49..000000000000
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ /dev/null
@@ -1,226 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/ctype.h>
11#include <linux/stat.h>
12
13#include "lock_dlm.h"
14
15static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
16{
17 return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
18}
19
20static ssize_t block_show(struct gdlm_ls *ls, char *buf)
21{
22 ssize_t ret;
23 int val = 0;
24
25 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
26 val = 1;
27 ret = sprintf(buf, "%d\n", val);
28 return ret;
29}
30
31static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
32{
33 ssize_t ret = len;
34 int val;
35
36 val = simple_strtol(buf, NULL, 0);
37
38 if (val == 1)
39 set_bit(DFL_BLOCK_LOCKS, &ls->flags);
40 else if (val == 0) {
41 clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
42 gdlm_submit_delayed(ls);
43 } else {
44 ret = -EINVAL;
45 }
46 return ret;
47}
48
49static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
50{
51 ssize_t ret;
52 int val = 0;
53
54 if (test_bit(DFL_WITHDRAW, &ls->flags))
55 val = 1;
56 ret = sprintf(buf, "%d\n", val);
57 return ret;
58}
59
60static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
61{
62 ssize_t ret = len;
63 int val;
64
65 val = simple_strtol(buf, NULL, 0);
66
67 if (val == 1)
68 set_bit(DFL_WITHDRAW, &ls->flags);
69 else
70 ret = -EINVAL;
71 wake_up(&ls->wait_control);
72 return ret;
73}
74
75static ssize_t id_show(struct gdlm_ls *ls, char *buf)
76{
77 return sprintf(buf, "%u\n", ls->id);
78}
79
80static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
81{
82 return sprintf(buf, "%d\n", ls->jid);
83}
84
85static ssize_t first_show(struct gdlm_ls *ls, char *buf)
86{
87 return sprintf(buf, "%d\n", ls->first);
88}
89
90static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
91{
92 return sprintf(buf, "%d\n", ls->first_done);
93}
94
95static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
96{
97 return sprintf(buf, "%d\n", ls->recover_jid);
98}
99
100static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
101{
102 ls->recover_jid = simple_strtol(buf, NULL, 0);
103 ls->fscb(ls->sdp, LM_CB_NEED_RECOVERY, &ls->recover_jid);
104 return len;
105}
106
107static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
108{
109 return sprintf(buf, "%d\n", ls->recover_jid_done);
110}
111
112static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
113{
114 return sprintf(buf, "%d\n", ls->recover_jid_status);
115}
116
117struct gdlm_attr {
118 struct attribute attr;
119 ssize_t (*show)(struct gdlm_ls *, char *);
120 ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
121};
122
123#define GDLM_ATTR(_name,_mode,_show,_store) \
124static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
125
126GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
127GDLM_ATTR(block, 0644, block_show, block_store);
128GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
129GDLM_ATTR(id, 0444, id_show, NULL);
130GDLM_ATTR(jid, 0444, jid_show, NULL);
131GDLM_ATTR(first, 0444, first_show, NULL);
132GDLM_ATTR(first_done, 0444, first_done_show, NULL);
133GDLM_ATTR(recover, 0644, recover_show, recover_store);
134GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
135GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
136
137static struct attribute *gdlm_attrs[] = {
138 &gdlm_attr_proto_name.attr,
139 &gdlm_attr_block.attr,
140 &gdlm_attr_withdraw.attr,
141 &gdlm_attr_id.attr,
142 &gdlm_attr_jid.attr,
143 &gdlm_attr_first.attr,
144 &gdlm_attr_first_done.attr,
145 &gdlm_attr_recover.attr,
146 &gdlm_attr_recover_done.attr,
147 &gdlm_attr_recover_status.attr,
148 NULL,
149};
150
151static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
152 char *buf)
153{
154 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
155 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
156 return a->show ? a->show(ls, buf) : 0;
157}
158
159static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
160 const char *buf, size_t len)
161{
162 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
163 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
164 return a->store ? a->store(ls, buf, len) : len;
165}
166
167static struct sysfs_ops gdlm_attr_ops = {
168 .show = gdlm_attr_show,
169 .store = gdlm_attr_store,
170};
171
172static struct kobj_type gdlm_ktype = {
173 .default_attrs = gdlm_attrs,
174 .sysfs_ops = &gdlm_attr_ops,
175};
176
177static struct kset *gdlm_kset;
178
179int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
180{
181 int error;
182
183 ls->kobj.kset = gdlm_kset;
184 error = kobject_init_and_add(&ls->kobj, &gdlm_ktype, fskobj,
185 "lock_module");
186 if (error)
187 log_error("can't register kobj %d", error);
188 kobject_uevent(&ls->kobj, KOBJ_ADD);
189
190 return error;
191}
192
193void gdlm_kobject_release(struct gdlm_ls *ls)
194{
195 kobject_put(&ls->kobj);
196}
197
198static int gdlm_uevent(struct kset *kset, struct kobject *kobj,
199 struct kobj_uevent_env *env)
200{
201 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
202 add_uevent_var(env, "LOCKTABLE=%s:%s", ls->clustername, ls->fsname);
203 add_uevent_var(env, "LOCKPROTO=lock_dlm");
204 return 0;
205}
206
207static struct kset_uevent_ops gdlm_uevent_ops = {
208 .uevent = gdlm_uevent,
209};
210
211
212int gdlm_sysfs_init(void)
213{
214 gdlm_kset = kset_create_and_add("lock_dlm", &gdlm_uevent_ops, kernel_kobj);
215 if (!gdlm_kset) {
216 printk(KERN_WARNING "%s: can not create kset\n", __func__);
217 return -ENOMEM;
218 }
219 return 0;
220}
221
222void gdlm_sysfs_exit(void)
223{
224 kset_unregister(gdlm_kset);
225}
226
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
deleted file mode 100644
index 38823efd698c..000000000000
--- a/fs/gfs2/locking/dlm/thread.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12static inline int no_work(struct gdlm_ls *ls)
13{
14 int ret;
15
16 spin_lock(&ls->async_lock);
17 ret = list_empty(&ls->submit);
18 spin_unlock(&ls->async_lock);
19
20 return ret;
21}
22
23static int gdlm_thread(void *data)
24{
25 struct gdlm_ls *ls = (struct gdlm_ls *) data;
26 struct gdlm_lock *lp = NULL;
27
28 while (!kthread_should_stop()) {
29 wait_event_interruptible(ls->thread_wait,
30 !no_work(ls) || kthread_should_stop());
31
32 spin_lock(&ls->async_lock);
33
34 if (!list_empty(&ls->submit)) {
35 lp = list_entry(ls->submit.next, struct gdlm_lock,
36 delay_list);
37 list_del_init(&lp->delay_list);
38 spin_unlock(&ls->async_lock);
39 gdlm_do_lock(lp);
40 spin_lock(&ls->async_lock);
41 }
42 spin_unlock(&ls->async_lock);
43 }
44
45 return 0;
46}
47
48int gdlm_init_threads(struct gdlm_ls *ls)
49{
50 struct task_struct *p;
51 int error;
52
53 p = kthread_run(gdlm_thread, ls, "lock_dlm");
54 error = IS_ERR(p);
55 if (error) {
56 log_error("can't start lock_dlm thread %d", error);
57 return error;
58 }
59 ls->thread = p;
60
61 return 0;
62}
63
64void gdlm_release_threads(struct gdlm_ls *ls)
65{
66 kthread_stop(ls->thread);
67}
68
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index ad305854bdc6..98918a756410 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -14,7 +14,6 @@
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h> 16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18#include <linux/delay.h> 17#include <linux/delay.h>
19#include <linux/kthread.h> 18#include <linux/kthread.h>
20#include <linux/freezer.h> 19#include <linux/freezer.h>
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4390f6f4047d..80e4f5f898bb 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -13,7 +13,6 @@
13#include <linux/completion.h> 13#include <linux/completion.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
19#include "incore.h" 18#include "incore.h"
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 7cacfde32194..a6892ed0840a 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,7 +14,6 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/atomic.h> 17#include <asm/atomic.h>
19 18
20#include "gfs2.h" 19#include "gfs2.h"
@@ -23,6 +22,12 @@
23#include "sys.h" 22#include "sys.h"
24#include "util.h" 23#include "util.h"
25#include "glock.h" 24#include "glock.h"
25#include "quota.h"
26
27static struct shrinker qd_shrinker = {
28 .shrink = gfs2_shrink_qd_memory,
29 .seeks = DEFAULT_SEEKS,
30};
26 31
27static void gfs2_init_inode_once(void *foo) 32static void gfs2_init_inode_once(void *foo)
28{ 33{
@@ -41,8 +46,6 @@ static void gfs2_init_glock_once(void *foo)
41 INIT_HLIST_NODE(&gl->gl_list); 46 INIT_HLIST_NODE(&gl->gl_list);
42 spin_lock_init(&gl->gl_spin); 47 spin_lock_init(&gl->gl_spin);
43 INIT_LIST_HEAD(&gl->gl_holders); 48 INIT_LIST_HEAD(&gl->gl_holders);
44 gl->gl_lvb = NULL;
45 atomic_set(&gl->gl_lvb_count, 0);
46 INIT_LIST_HEAD(&gl->gl_lru); 49 INIT_LIST_HEAD(&gl->gl_lru);
47 INIT_LIST_HEAD(&gl->gl_ail_list); 50 INIT_LIST_HEAD(&gl->gl_ail_list);
48 atomic_set(&gl->gl_ail_count, 0); 51 atomic_set(&gl->gl_ail_count, 0);
@@ -100,6 +103,8 @@ static int __init init_gfs2_fs(void)
100 if (!gfs2_quotad_cachep) 103 if (!gfs2_quotad_cachep)
101 goto fail; 104 goto fail;
102 105
106 register_shrinker(&qd_shrinker);
107
103 error = register_filesystem(&gfs2_fs_type); 108 error = register_filesystem(&gfs2_fs_type);
104 if (error) 109 if (error)
105 goto fail; 110 goto fail;
@@ -117,6 +122,7 @@ static int __init init_gfs2_fs(void)
117fail_unregister: 122fail_unregister:
118 unregister_filesystem(&gfs2_fs_type); 123 unregister_filesystem(&gfs2_fs_type);
119fail: 124fail:
125 unregister_shrinker(&qd_shrinker);
120 gfs2_glock_exit(); 126 gfs2_glock_exit();
121 127
122 if (gfs2_quotad_cachep) 128 if (gfs2_quotad_cachep)
@@ -145,6 +151,7 @@ fail:
145 151
146static void __exit exit_gfs2_fs(void) 152static void __exit exit_gfs2_fs(void)
147{ 153{
154 unregister_shrinker(&qd_shrinker);
148 gfs2_glock_exit(); 155 gfs2_glock_exit();
149 gfs2_unregister_debugfs(); 156 gfs2_unregister_debugfs();
150 unregister_filesystem(&gfs2_fs_type); 157 unregister_filesystem(&gfs2_fs_type);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 09853620c951..8d6f13256b26 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -19,7 +19,6 @@
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/lm_interface.h>
23 22
24#include "gfs2.h" 23#include "gfs2.h"
25#include "incore.h" 24#include "incore.h"
@@ -90,27 +89,6 @@ void gfs2_aspace_put(struct inode *aspace)
90} 89}
91 90
92/** 91/**
93 * gfs2_meta_inval - Invalidate all buffers associated with a glock
94 * @gl: the glock
95 *
96 */
97
98void gfs2_meta_inval(struct gfs2_glock *gl)
99{
100 struct gfs2_sbd *sdp = gl->gl_sbd;
101 struct inode *aspace = gl->gl_aspace;
102 struct address_space *mapping = gl->gl_aspace->i_mapping;
103
104 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
105
106 atomic_inc(&aspace->i_writecount);
107 truncate_inode_pages(mapping, 0);
108 atomic_dec(&aspace->i_writecount);
109
110 gfs2_assert_withdraw(sdp, !mapping->nrpages);
111}
112
113/**
114 * gfs2_meta_sync - Sync all buffers associated with a glock 92 * gfs2_meta_sync - Sync all buffers associated with a glock
115 * @gl: The glock 93 * @gl: The glock
116 * 94 *
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index b1a5f3674d43..de270c2f9b63 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -40,7 +40,6 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
40struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp); 40struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
41void gfs2_aspace_put(struct inode *aspace); 41void gfs2_aspace_put(struct inode *aspace);
42 42
43void gfs2_meta_inval(struct gfs2_glock *gl);
44void gfs2_meta_sync(struct gfs2_glock *gl); 43void gfs2_meta_sync(struct gfs2_glock *gl);
45 44
46struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); 45struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 3cb0a44ba023..f7e8527a21e0 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -12,12 +12,11 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/lm_interface.h>
16#include <linux/parser.h> 15#include <linux/parser.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
19#include "incore.h" 18#include "incore.h"
20#include "mount.h" 19#include "super.h"
21#include "sys.h" 20#include "sys.h"
22#include "util.h" 21#include "util.h"
23 22
@@ -37,11 +36,15 @@ enum {
37 Opt_quota_off, 36 Opt_quota_off,
38 Opt_quota_account, 37 Opt_quota_account,
39 Opt_quota_on, 38 Opt_quota_on,
39 Opt_quota,
40 Opt_noquota,
40 Opt_suiddir, 41 Opt_suiddir,
41 Opt_nosuiddir, 42 Opt_nosuiddir,
42 Opt_data_writeback, 43 Opt_data_writeback,
43 Opt_data_ordered, 44 Opt_data_ordered,
44 Opt_meta, 45 Opt_meta,
46 Opt_discard,
47 Opt_nodiscard,
45 Opt_err, 48 Opt_err,
46}; 49};
47 50
@@ -61,11 +64,15 @@ static const match_table_t tokens = {
61 {Opt_quota_off, "quota=off"}, 64 {Opt_quota_off, "quota=off"},
62 {Opt_quota_account, "quota=account"}, 65 {Opt_quota_account, "quota=account"},
63 {Opt_quota_on, "quota=on"}, 66 {Opt_quota_on, "quota=on"},
67 {Opt_quota, "quota"},
68 {Opt_noquota, "noquota"},
64 {Opt_suiddir, "suiddir"}, 69 {Opt_suiddir, "suiddir"},
65 {Opt_nosuiddir, "nosuiddir"}, 70 {Opt_nosuiddir, "nosuiddir"},
66 {Opt_data_writeback, "data=writeback"}, 71 {Opt_data_writeback, "data=writeback"},
67 {Opt_data_ordered, "data=ordered"}, 72 {Opt_data_ordered, "data=ordered"},
68 {Opt_meta, "meta"}, 73 {Opt_meta, "meta"},
74 {Opt_discard, "discard"},
75 {Opt_nodiscard, "nodiscard"},
69 {Opt_err, NULL} 76 {Opt_err, NULL}
70}; 77};
71 78
@@ -77,101 +84,46 @@ static const match_table_t tokens = {
77 * Return: errno 84 * Return: errno
78 */ 85 */
79 86
80int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) 87int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
81{ 88{
82 struct gfs2_args *args = &sdp->sd_args; 89 char *o;
83 char *data = data_arg; 90 int token;
84 char *options, *o, *v; 91 substring_t tmp[MAX_OPT_ARGS];
85 int error = 0;
86
87 if (!remount) {
88 /* Set some defaults */
89 args->ar_quota = GFS2_QUOTA_DEFAULT;
90 args->ar_data = GFS2_DATA_DEFAULT;
91 }
92 92
93 /* Split the options into tokens with the "," character and 93 /* Split the options into tokens with the "," character and
94 process them */ 94 process them */
95 95
96 for (options = data; (o = strsep(&options, ",")); ) { 96 while (1) {
97 int token; 97 o = strsep(&options, ",");
98 substring_t tmp[MAX_OPT_ARGS]; 98 if (o == NULL)
99 99 break;
100 if (!*o) 100 if (*o == '\0')
101 continue; 101 continue;
102 102
103 token = match_token(o, tokens, tmp); 103 token = match_token(o, tokens, tmp);
104 switch (token) { 104 switch (token) {
105 case Opt_lockproto: 105 case Opt_lockproto:
106 v = match_strdup(&tmp[0]); 106 match_strlcpy(args->ar_lockproto, &tmp[0],
107 if (!v) { 107 GFS2_LOCKNAME_LEN);
108 fs_info(sdp, "no memory for lockproto\n");
109 error = -ENOMEM;
110 goto out_error;
111 }
112
113 if (remount && strcmp(v, args->ar_lockproto)) {
114 kfree(v);
115 goto cant_remount;
116 }
117
118 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
119 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
120 kfree(v);
121 break; 108 break;
122 case Opt_locktable: 109 case Opt_locktable:
123 v = match_strdup(&tmp[0]); 110 match_strlcpy(args->ar_locktable, &tmp[0],
124 if (!v) { 111 GFS2_LOCKNAME_LEN);
125 fs_info(sdp, "no memory for locktable\n");
126 error = -ENOMEM;
127 goto out_error;
128 }
129
130 if (remount && strcmp(v, args->ar_locktable)) {
131 kfree(v);
132 goto cant_remount;
133 }
134
135 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
136 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
137 kfree(v);
138 break; 112 break;
139 case Opt_hostdata: 113 case Opt_hostdata:
140 v = match_strdup(&tmp[0]); 114 match_strlcpy(args->ar_hostdata, &tmp[0],
141 if (!v) { 115 GFS2_LOCKNAME_LEN);
142 fs_info(sdp, "no memory for hostdata\n");
143 error = -ENOMEM;
144 goto out_error;
145 }
146
147 if (remount && strcmp(v, args->ar_hostdata)) {
148 kfree(v);
149 goto cant_remount;
150 }
151
152 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
153 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
154 kfree(v);
155 break; 116 break;
156 case Opt_spectator: 117 case Opt_spectator:
157 if (remount && !args->ar_spectator)
158 goto cant_remount;
159 args->ar_spectator = 1; 118 args->ar_spectator = 1;
160 sdp->sd_vfs->s_flags |= MS_RDONLY;
161 break; 119 break;
162 case Opt_ignore_local_fs: 120 case Opt_ignore_local_fs:
163 if (remount && !args->ar_ignore_local_fs)
164 goto cant_remount;
165 args->ar_ignore_local_fs = 1; 121 args->ar_ignore_local_fs = 1;
166 break; 122 break;
167 case Opt_localflocks: 123 case Opt_localflocks:
168 if (remount && !args->ar_localflocks)
169 goto cant_remount;
170 args->ar_localflocks = 1; 124 args->ar_localflocks = 1;
171 break; 125 break;
172 case Opt_localcaching: 126 case Opt_localcaching:
173 if (remount && !args->ar_localcaching)
174 goto cant_remount;
175 args->ar_localcaching = 1; 127 args->ar_localcaching = 1;
176 break; 128 break;
177 case Opt_debug: 129 case Opt_debug:
@@ -181,25 +133,23 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
181 args->ar_debug = 0; 133 args->ar_debug = 0;
182 break; 134 break;
183 case Opt_upgrade: 135 case Opt_upgrade:
184 if (remount && !args->ar_upgrade)
185 goto cant_remount;
186 args->ar_upgrade = 1; 136 args->ar_upgrade = 1;
187 break; 137 break;
188 case Opt_acl: 138 case Opt_acl:
189 args->ar_posix_acl = 1; 139 args->ar_posix_acl = 1;
190 sdp->sd_vfs->s_flags |= MS_POSIXACL;
191 break; 140 break;
192 case Opt_noacl: 141 case Opt_noacl:
193 args->ar_posix_acl = 0; 142 args->ar_posix_acl = 0;
194 sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
195 break; 143 break;
196 case Opt_quota_off: 144 case Opt_quota_off:
145 case Opt_noquota:
197 args->ar_quota = GFS2_QUOTA_OFF; 146 args->ar_quota = GFS2_QUOTA_OFF;
198 break; 147 break;
199 case Opt_quota_account: 148 case Opt_quota_account:
200 args->ar_quota = GFS2_QUOTA_ACCOUNT; 149 args->ar_quota = GFS2_QUOTA_ACCOUNT;
201 break; 150 break;
202 case Opt_quota_on: 151 case Opt_quota_on:
152 case Opt_quota:
203 args->ar_quota = GFS2_QUOTA_ON; 153 args->ar_quota = GFS2_QUOTA_ON;
204 break; 154 break;
205 case Opt_suiddir: 155 case Opt_suiddir:
@@ -215,29 +165,21 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
215 args->ar_data = GFS2_DATA_ORDERED; 165 args->ar_data = GFS2_DATA_ORDERED;
216 break; 166 break;
217 case Opt_meta: 167 case Opt_meta:
218 if (remount && args->ar_meta != 1)
219 goto cant_remount;
220 args->ar_meta = 1; 168 args->ar_meta = 1;
221 break; 169 break;
170 case Opt_discard:
171 args->ar_discard = 1;
172 break;
173 case Opt_nodiscard:
174 args->ar_discard = 0;
175 break;
222 case Opt_err: 176 case Opt_err:
223 default: 177 default:
224 fs_info(sdp, "unknown option: %s\n", o); 178 fs_info(sdp, "invalid mount option: %s\n", o);
225 error = -EINVAL; 179 return -EINVAL;
226 goto out_error;
227 } 180 }
228 } 181 }
229 182
230out_error: 183 return 0;
231 if (error)
232 fs_info(sdp, "invalid mount option(s)\n");
233
234 if (data != data_arg)
235 kfree(data);
236
237 return error;
238
239cant_remount:
240 fs_info(sdp, "can't remount with option %s\n", o);
241 return -EINVAL;
242} 184}
243 185
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
deleted file mode 100644
index 401288acfdf3..000000000000
--- a/fs/gfs2/mount.h
+++ /dev/null
@@ -1,17 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __MOUNT_DOT_H__
11#define __MOUNT_DOT_H__
12
13struct gfs2_sbd;
14
15int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
16
17#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 4ddab67867eb..a6dde1751e17 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -19,7 +19,6 @@
19#include <linux/writeback.h> 19#include <linux/writeback.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/lm_interface.h>
23#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
24 23
25#include "gfs2.h" 24#include "gfs2.h"
@@ -442,6 +441,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
442 */ 441 */
443 if (unlikely(page->index)) { 442 if (unlikely(page->index)) {
444 zero_user(page, 0, PAGE_CACHE_SIZE); 443 zero_user(page, 0, PAGE_CACHE_SIZE);
444 SetPageUptodate(page);
445 return 0; 445 return 0;
446 } 446 }
447 447
@@ -1096,6 +1096,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
1096 .releasepage = gfs2_releasepage, 1096 .releasepage = gfs2_releasepage,
1097 .direct_IO = gfs2_direct_IO, 1097 .direct_IO = gfs2_direct_IO,
1098 .migratepage = buffer_migrate_page, 1098 .migratepage = buffer_migrate_page,
1099 .is_partially_uptodate = block_is_partially_uptodate,
1099}; 1100};
1100 1101
1101static const struct address_space_operations gfs2_ordered_aops = { 1102static const struct address_space_operations gfs2_ordered_aops = {
@@ -1111,6 +1112,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
1111 .releasepage = gfs2_releasepage, 1112 .releasepage = gfs2_releasepage,
1112 .direct_IO = gfs2_direct_IO, 1113 .direct_IO = gfs2_direct_IO,
1113 .migratepage = buffer_migrate_page, 1114 .migratepage = buffer_migrate_page,
1115 .is_partially_uptodate = block_is_partially_uptodate,
1114}; 1116};
1115 1117
1116static const struct address_space_operations gfs2_jdata_aops = { 1118static const struct address_space_operations gfs2_jdata_aops = {
@@ -1125,6 +1127,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
1125 .bmap = gfs2_bmap, 1127 .bmap = gfs2_bmap,
1126 .invalidatepage = gfs2_invalidatepage, 1128 .invalidatepage = gfs2_invalidatepage,
1127 .releasepage = gfs2_releasepage, 1129 .releasepage = gfs2_releasepage,
1130 .is_partially_uptodate = block_is_partially_uptodate,
1128}; 1131};
1129 1132
1130void gfs2_set_aops(struct inode *inode) 1133void gfs2_set_aops(struct inode *inode)
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index c2ad36330ca3..5eb57b044382 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/crc32.h> 15#include <linux/crc32.h>
16#include <linux/lm_interface.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
19#include "incore.h" 18#include "incore.h"
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 7fdeb14ddd1a..9200ef221716 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -14,7 +14,6 @@
14#include <linux/exportfs.h> 14#include <linux/exportfs.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h> 16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18 17
19#include "gfs2.h" 18#include "gfs2.h"
20#include "incore.h" 19#include "incore.h"
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 93fe41b67f97..3b9e8de3500b 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -20,9 +20,10 @@
20#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
21#include <linux/ext2_fs.h> 21#include <linux/ext2_fs.h>
22#include <linux/crc32.h> 22#include <linux/crc32.h>
23#include <linux/lm_interface.h>
24#include <linux/writeback.h> 23#include <linux/writeback.h>
25#include <asm/uaccess.h> 24#include <asm/uaccess.h>
25#include <linux/dlm.h>
26#include <linux/dlm_plock.h>
26 27
27#include "gfs2.h" 28#include "gfs2.h"
28#include "incore.h" 29#include "incore.h"
@@ -354,7 +355,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
354 if (ret) 355 if (ret)
355 goto out; 356 goto out;
356 357
358 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
357 set_bit(GIF_SW_PAGED, &ip->i_flags); 359 set_bit(GIF_SW_PAGED, &ip->i_flags);
360
358 ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); 361 ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
359 if (ret || !alloc_required) 362 if (ret || !alloc_required)
360 goto out_unlock; 363 goto out_unlock;
@@ -560,57 +563,24 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
560 return ret; 563 return ret;
561} 564}
562 565
566#ifdef CONFIG_GFS2_FS_LOCKING_DLM
567
563/** 568/**
564 * gfs2_setlease - acquire/release a file lease 569 * gfs2_setlease - acquire/release a file lease
565 * @file: the file pointer 570 * @file: the file pointer
566 * @arg: lease type 571 * @arg: lease type
567 * @fl: file lock 572 * @fl: file lock
568 * 573 *
574 * We don't currently have a way to enforce a lease across the whole
575 * cluster; until we do, disable leases (by just returning -EINVAL),
576 * unless the administrator has requested purely local locking.
577 *
569 * Returns: errno 578 * Returns: errno
570 */ 579 */
571 580
572static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) 581static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
573{ 582{
574 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); 583 return -EINVAL;
575
576 /*
577 * We don't currently have a way to enforce a lease across the whole
578 * cluster; until we do, disable leases (by just returning -EINVAL),
579 * unless the administrator has requested purely local locking.
580 */
581 if (!sdp->sd_args.ar_localflocks)
582 return -EINVAL;
583 return generic_setlease(file, arg, fl);
584}
585
586static int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
587 struct file *file, struct file_lock *fl)
588{
589 int error = -EIO;
590 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
591 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
592 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
593 return error;
594}
595
596static int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
597 struct file *file, int cmd, struct file_lock *fl)
598{
599 int error = -EIO;
600 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
601 error = sdp->sd_lockstruct.ls_ops->lm_plock(
602 sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
603 return error;
604}
605
606static int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
607 struct file *file, struct file_lock *fl)
608{
609 int error = -EIO;
610 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
611 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
612 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
613 return error;
614} 584}
615 585
616/** 586/**
@@ -626,9 +596,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
626{ 596{
627 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 597 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
628 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); 598 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
629 struct lm_lockname name = 599 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
630 { .ln_number = ip->i_no_addr,
631 .ln_type = LM_TYPE_PLOCK };
632 600
633 if (!(fl->fl_flags & FL_POSIX)) 601 if (!(fl->fl_flags & FL_POSIX))
634 return -ENOLCK; 602 return -ENOLCK;
@@ -640,12 +608,14 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
640 cmd = F_SETLK; 608 cmd = F_SETLK;
641 fl->fl_type = F_UNLCK; 609 fl->fl_type = F_UNLCK;
642 } 610 }
611 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
612 return -EIO;
643 if (IS_GETLK(cmd)) 613 if (IS_GETLK(cmd))
644 return gfs2_lm_plock_get(sdp, &name, file, fl); 614 return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
645 else if (fl->fl_type == F_UNLCK) 615 else if (fl->fl_type == F_UNLCK)
646 return gfs2_lm_punlock(sdp, &name, file, fl); 616 return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
647 else 617 else
648 return gfs2_lm_plock(sdp, &name, file, cmd, fl); 618 return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
649} 619}
650 620
651static int do_flock(struct file *file, int cmd, struct file_lock *fl) 621static int do_flock(struct file *file, int cmd, struct file_lock *fl)
@@ -732,7 +702,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
732 } 702 }
733} 703}
734 704
735const struct file_operations gfs2_file_fops = { 705const struct file_operations *gfs2_file_fops = &(const struct file_operations){
736 .llseek = gfs2_llseek, 706 .llseek = gfs2_llseek,
737 .read = do_sync_read, 707 .read = do_sync_read,
738 .aio_read = generic_file_aio_read, 708 .aio_read = generic_file_aio_read,
@@ -750,7 +720,7 @@ const struct file_operations gfs2_file_fops = {
750 .setlease = gfs2_setlease, 720 .setlease = gfs2_setlease,
751}; 721};
752 722
753const struct file_operations gfs2_dir_fops = { 723const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
754 .readdir = gfs2_readdir, 724 .readdir = gfs2_readdir,
755 .unlocked_ioctl = gfs2_ioctl, 725 .unlocked_ioctl = gfs2_ioctl,
756 .open = gfs2_open, 726 .open = gfs2_open,
@@ -760,7 +730,9 @@ const struct file_operations gfs2_dir_fops = {
760 .flock = gfs2_flock, 730 .flock = gfs2_flock,
761}; 731};
762 732
763const struct file_operations gfs2_file_fops_nolock = { 733#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
734
735const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){
764 .llseek = gfs2_llseek, 736 .llseek = gfs2_llseek,
765 .read = do_sync_read, 737 .read = do_sync_read,
766 .aio_read = generic_file_aio_read, 738 .aio_read = generic_file_aio_read,
@@ -773,10 +745,10 @@ const struct file_operations gfs2_file_fops_nolock = {
773 .fsync = gfs2_fsync, 745 .fsync = gfs2_fsync,
774 .splice_read = generic_file_splice_read, 746 .splice_read = generic_file_splice_read,
775 .splice_write = generic_file_splice_write, 747 .splice_write = generic_file_splice_write,
776 .setlease = gfs2_setlease, 748 .setlease = generic_setlease,
777}; 749};
778 750
779const struct file_operations gfs2_dir_fops_nolock = { 751const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){
780 .readdir = gfs2_readdir, 752 .readdir = gfs2_readdir,
781 .unlocked_ioctl = gfs2_ioctl, 753 .unlocked_ioctl = gfs2_ioctl,
782 .open = gfs2_open, 754 .open = gfs2_open,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index f91eebdde581..51883b3ad89c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -17,7 +17,6 @@
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
20#include <linux/lm_interface.h>
21 20
22#include "gfs2.h" 21#include "gfs2.h"
23#include "incore.h" 22#include "incore.h"
@@ -25,7 +24,6 @@
25#include "glock.h" 24#include "glock.h"
26#include "glops.h" 25#include "glops.h"
27#include "inode.h" 26#include "inode.h"
28#include "mount.h"
29#include "recovery.h" 27#include "recovery.h"
30#include "rgrp.h" 28#include "rgrp.h"
31#include "super.h" 29#include "super.h"
@@ -64,7 +62,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
64 gt->gt_quota_warn_period = 10; 62 gt->gt_quota_warn_period = 10;
65 gt->gt_quota_scale_num = 1; 63 gt->gt_quota_scale_num = 1;
66 gt->gt_quota_scale_den = 1; 64 gt->gt_quota_scale_den = 1;
67 gt->gt_quota_cache_secs = 300;
68 gt->gt_quota_quantum = 60; 65 gt->gt_quota_quantum = 60;
69 gt->gt_new_files_jdata = 0; 66 gt->gt_new_files_jdata = 0;
70 gt->gt_max_readahead = 1 << 18; 67 gt->gt_max_readahead = 1 << 18;
@@ -100,7 +97,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
100 mutex_init(&sdp->sd_jindex_mutex); 97 mutex_init(&sdp->sd_jindex_mutex);
101 98
102 INIT_LIST_HEAD(&sdp->sd_quota_list); 99 INIT_LIST_HEAD(&sdp->sd_quota_list);
103 spin_lock_init(&sdp->sd_quota_spin);
104 mutex_init(&sdp->sd_quota_mutex); 100 mutex_init(&sdp->sd_quota_mutex);
105 init_waitqueue_head(&sdp->sd_quota_wait); 101 init_waitqueue_head(&sdp->sd_quota_wait);
106 INIT_LIST_HEAD(&sdp->sd_trunc_list); 102 INIT_LIST_HEAD(&sdp->sd_trunc_list);
@@ -238,6 +234,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
238 234
239 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); 235 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
240 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); 236 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
237 memcpy(sb->sb_uuid, str->sb_uuid, 16);
241} 238}
242 239
243/** 240/**
@@ -299,15 +296,15 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
299 __free_page(page); 296 __free_page(page);
300 return 0; 297 return 0;
301} 298}
299
302/** 300/**
303 * gfs2_read_sb - Read super block 301 * gfs2_read_sb - Read super block
304 * @sdp: The GFS2 superblock 302 * @sdp: The GFS2 superblock
305 * @gl: the glock for the superblock (assumed to be held)
306 * @silent: Don't print message if mount fails 303 * @silent: Don't print message if mount fails
307 * 304 *
308 */ 305 */
309 306
310static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) 307static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
311{ 308{
312 u32 hash_blocks, ind_blocks, leaf_blocks; 309 u32 hash_blocks, ind_blocks, leaf_blocks;
313 u32 tmp_blocks; 310 u32 tmp_blocks;
@@ -527,7 +524,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
527 return ret; 524 return ret;
528 } 525 }
529 526
530 ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); 527 ret = gfs2_read_sb(sdp, silent);
531 if (ret) { 528 if (ret) {
532 fs_err(sdp, "can't read superblock: %d\n", ret); 529 fs_err(sdp, "can't read superblock: %d\n", ret);
533 goto out; 530 goto out;
@@ -630,13 +627,13 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
630 return rc; 627 return rc;
631} 628}
632 629
633static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) 630static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
634{ 631{
635 if (!sdp->sd_lockstruct.ls_ops->lm_others_may_mount) 632 char *message = "FIRSTMOUNT=Done";
636 return; 633 char *envp[] = { message, NULL };
637 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 634 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
638 sdp->sd_lockstruct.ls_ops->lm_others_may_mount( 635 ls->ls_first_done = 1;
639 sdp->sd_lockstruct.ls_lockspace); 636 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
640} 637}
641 638
642/** 639/**
@@ -796,7 +793,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
796 } 793 }
797 } 794 }
798 795
799 gfs2_lm_others_may_mount(sdp); 796 gfs2_others_may_mount(sdp);
800 } else if (!sdp->sd_args.ar_spectator) { 797 } else if (!sdp->sd_args.ar_spectator) {
801 error = gfs2_recover_journal(sdp->sd_jdesc); 798 error = gfs2_recover_journal(sdp->sd_jdesc);
802 if (error) { 799 if (error) {
@@ -1005,7 +1002,6 @@ static int init_threads(struct gfs2_sbd *sdp, int undo)
1005 goto fail_quotad; 1002 goto fail_quotad;
1006 1003
1007 sdp->sd_log_flush_time = jiffies; 1004 sdp->sd_log_flush_time = jiffies;
1008 sdp->sd_jindex_refresh_time = jiffies;
1009 1005
1010 p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); 1006 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
1011 error = IS_ERR(p); 1007 error = IS_ERR(p);
@@ -1033,6 +1029,17 @@ fail:
1033 return error; 1029 return error;
1034} 1030}
1035 1031
1032static const match_table_t nolock_tokens = {
1033 { Opt_jid, "jid=%d\n", },
1034 { Opt_err, NULL },
1035};
1036
1037static const struct lm_lockops nolock_ops = {
1038 .lm_proto_name = "lock_nolock",
1039 .lm_put_lock = kmem_cache_free,
1040 .lm_tokens = &nolock_tokens,
1041};
1042
1036/** 1043/**
1037 * gfs2_lm_mount - mount a locking protocol 1044 * gfs2_lm_mount - mount a locking protocol
1038 * @sdp: the filesystem 1045 * @sdp: the filesystem
@@ -1044,31 +1051,73 @@ fail:
1044 1051
1045static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) 1052static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1046{ 1053{
1047 char *proto = sdp->sd_proto_name; 1054 const struct lm_lockops *lm;
1048 char *table = sdp->sd_table_name; 1055 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1049 int flags = LM_MFLAG_CONV_NODROP; 1056 struct gfs2_args *args = &sdp->sd_args;
1050 int error; 1057 const char *proto = sdp->sd_proto_name;
1058 const char *table = sdp->sd_table_name;
1059 const char *fsname;
1060 char *o, *options;
1061 int ret;
1051 1062
1052 if (sdp->sd_args.ar_spectator) 1063 if (!strcmp("lock_nolock", proto)) {
1053 flags |= LM_MFLAG_SPECTATOR; 1064 lm = &nolock_ops;
1065 sdp->sd_args.ar_localflocks = 1;
1066 sdp->sd_args.ar_localcaching = 1;
1067#ifdef CONFIG_GFS2_FS_LOCKING_DLM
1068 } else if (!strcmp("lock_dlm", proto)) {
1069 lm = &gfs2_dlm_ops;
1070#endif
1071 } else {
1072 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto);
1073 return -ENOENT;
1074 }
1054 1075
1055 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table); 1076 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
1056 1077
1057 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata, 1078 ls->ls_ops = lm;
1058 gfs2_glock_cb, sdp, 1079 ls->ls_first = 1;
1059 GFS2_MIN_LVB_SIZE, flags, 1080 ls->ls_id = 0;
1060 &sdp->sd_lockstruct, &sdp->sd_kobj);
1061 if (error) {
1062 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
1063 proto, table, sdp->sd_args.ar_hostdata);
1064 goto out;
1065 }
1066 1081
1067 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || 1082 for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) {
1068 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= 1083 substring_t tmp[MAX_OPT_ARGS];
1069 GFS2_MIN_LVB_SIZE)) { 1084 int token, option;
1070 gfs2_unmount_lockproto(&sdp->sd_lockstruct); 1085
1071 goto out; 1086 if (!o || !*o)
1087 continue;
1088
1089 token = match_token(o, *lm->lm_tokens, tmp);
1090 switch (token) {
1091 case Opt_jid:
1092 ret = match_int(&tmp[0], &option);
1093 if (ret || option < 0)
1094 goto hostdata_error;
1095 ls->ls_jid = option;
1096 break;
1097 case Opt_id:
1098 ret = match_int(&tmp[0], &option);
1099 if (ret)
1100 goto hostdata_error;
1101 ls->ls_id = option;
1102 break;
1103 case Opt_first:
1104 ret = match_int(&tmp[0], &option);
1105 if (ret || (option != 0 && option != 1))
1106 goto hostdata_error;
1107 ls->ls_first = option;
1108 break;
1109 case Opt_nodir:
1110 ret = match_int(&tmp[0], &option);
1111 if (ret || (option != 0 && option != 1))
1112 goto hostdata_error;
1113 ls->ls_nodir = option;
1114 break;
1115 case Opt_err:
1116 default:
1117hostdata_error:
1118 fs_info(sdp, "unknown hostdata (%s)\n", o);
1119 return -EINVAL;
1120 }
1072 } 1121 }
1073 1122
1074 if (sdp->sd_args.ar_spectator) 1123 if (sdp->sd_args.ar_spectator)
@@ -1077,22 +1126,25 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1077 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, 1126 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
1078 sdp->sd_lockstruct.ls_jid); 1127 sdp->sd_lockstruct.ls_jid);
1079 1128
1080 fs_info(sdp, "Joined cluster. Now mounting FS...\n"); 1129 fsname = strchr(table, ':');
1081 1130 if (fsname)
1082 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) && 1131 fsname++;
1083 !sdp->sd_args.ar_ignore_local_fs) { 1132 if (lm->lm_mount == NULL) {
1084 sdp->sd_args.ar_localflocks = 1; 1133 fs_info(sdp, "Now mounting FS...\n");
1085 sdp->sd_args.ar_localcaching = 1; 1134 return 0;
1086 } 1135 }
1087 1136 ret = lm->lm_mount(sdp, fsname);
1088out: 1137 if (ret == 0)
1089 return error; 1138 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
1139 return ret;
1090} 1140}
1091 1141
1092void gfs2_lm_unmount(struct gfs2_sbd *sdp) 1142void gfs2_lm_unmount(struct gfs2_sbd *sdp)
1093{ 1143{
1094 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 1144 const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
1095 gfs2_unmount_lockproto(&sdp->sd_lockstruct); 1145 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
1146 lm->lm_unmount)
1147 lm->lm_unmount(sdp);
1096} 1148}
1097 1149
1098/** 1150/**
@@ -1116,12 +1168,20 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1116 return -ENOMEM; 1168 return -ENOMEM;
1117 } 1169 }
1118 1170
1119 error = gfs2_mount_args(sdp, (char *)data, 0); 1171 sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
1172 sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
1173
1174 error = gfs2_mount_args(sdp, &sdp->sd_args, data);
1120 if (error) { 1175 if (error) {
1121 printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); 1176 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
1122 goto fail; 1177 goto fail;
1123 } 1178 }
1124 1179
1180 if (sdp->sd_args.ar_spectator)
1181 sb->s_flags |= MS_RDONLY;
1182 if (sdp->sd_args.ar_posix_acl)
1183 sb->s_flags |= MS_POSIXACL;
1184
1125 sb->s_magic = GFS2_MAGIC; 1185 sb->s_magic = GFS2_MAGIC;
1126 sb->s_op = &gfs2_super_ops; 1186 sb->s_op = &gfs2_super_ops;
1127 sb->s_export_op = &gfs2_export_ops; 1187 sb->s_export_op = &gfs2_export_ops;
@@ -1199,6 +1259,8 @@ fail_sb:
1199 dput(sdp->sd_root_dir); 1259 dput(sdp->sd_root_dir);
1200 if (sdp->sd_master_dir) 1260 if (sdp->sd_master_dir)
1201 dput(sdp->sd_master_dir); 1261 dput(sdp->sd_master_dir);
1262 if (sb->s_root)
1263 dput(sb->s_root);
1202 sb->s_root = NULL; 1264 sb->s_root = NULL;
1203fail_locking: 1265fail_locking:
1204 init_locking(sdp, &mount_gh, UNDO); 1266 init_locking(sdp, &mount_gh, UNDO);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 49877546beb9..abd5429ae285 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -18,7 +18,6 @@
18#include <linux/posix_acl.h> 18#include <linux/posix_acl.h>
19#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
20#include <linux/crc32.h> 20#include <linux/crc32.h>
21#include <linux/lm_interface.h>
22#include <linux/fiemap.h> 21#include <linux/fiemap.h>
23#include <asm/uaccess.h> 22#include <asm/uaccess.h>
24 23
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 320323d03479..458019569dcb 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -19,7 +19,6 @@
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h> 21#include <linux/crc32.h>
22#include <linux/lm_interface.h>
23#include <linux/time.h> 22#include <linux/time.h>
24 23
25#include "gfs2.h" 24#include "gfs2.h"
@@ -27,7 +26,6 @@
27#include "glock.h" 26#include "glock.h"
28#include "inode.h" 27#include "inode.h"
29#include "log.h" 28#include "log.h"
30#include "mount.h"
31#include "quota.h" 29#include "quota.h"
32#include "recovery.h" 30#include "recovery.h"
33#include "rgrp.h" 31#include "rgrp.h"
@@ -40,6 +38,8 @@
40#include "bmap.h" 38#include "bmap.h"
41#include "meta_io.h" 39#include "meta_io.h"
42 40
41#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
42
43/** 43/**
44 * gfs2_write_inode - Make sure the inode is stable on the disk 44 * gfs2_write_inode - Make sure the inode is stable on the disk
45 * @inode: The inode 45 * @inode: The inode
@@ -435,25 +435,45 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
435static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) 435static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
436{ 436{
437 struct gfs2_sbd *sdp = sb->s_fs_info; 437 struct gfs2_sbd *sdp = sb->s_fs_info;
438 struct gfs2_args args = sdp->sd_args; /* Default to current settings */
438 int error; 439 int error;
439 440
440 error = gfs2_mount_args(sdp, data, 1); 441 error = gfs2_mount_args(sdp, &args, data);
441 if (error) 442 if (error)
442 return error; 443 return error;
443 444
445 /* Not allowed to change locking details */
446 if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
447 strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
448 strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
449 return -EINVAL;
450
451 /* Some flags must not be changed */
452 if (args_neq(&args, &sdp->sd_args, spectator) ||
453 args_neq(&args, &sdp->sd_args, ignore_local_fs) ||
454 args_neq(&args, &sdp->sd_args, localflocks) ||
455 args_neq(&args, &sdp->sd_args, localcaching) ||
456 args_neq(&args, &sdp->sd_args, meta))
457 return -EINVAL;
458
444 if (sdp->sd_args.ar_spectator) 459 if (sdp->sd_args.ar_spectator)
445 *flags |= MS_RDONLY; 460 *flags |= MS_RDONLY;
446 else { 461
447 if (*flags & MS_RDONLY) { 462 if ((sb->s_flags ^ *flags) & MS_RDONLY) {
448 if (!(sb->s_flags & MS_RDONLY)) 463 if (*flags & MS_RDONLY)
449 error = gfs2_make_fs_ro(sdp); 464 error = gfs2_make_fs_ro(sdp);
450 } else if (!(*flags & MS_RDONLY) && 465 else
451 (sb->s_flags & MS_RDONLY)) {
452 error = gfs2_make_fs_rw(sdp); 466 error = gfs2_make_fs_rw(sdp);
453 } 467 if (error)
468 return error;
454 } 469 }
455 470
456 return error; 471 sdp->sd_args = args;
472 if (sdp->sd_args.ar_posix_acl)
473 sb->s_flags |= MS_POSIXACL;
474 else
475 sb->s_flags &= ~MS_POSIXACL;
476 return 0;
457} 477}
458 478
459/** 479/**
@@ -588,6 +608,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
588 } 608 }
589 seq_printf(s, ",data=%s", state); 609 seq_printf(s, ",data=%s", state);
590 } 610 }
611 if (args->ar_discard)
612 seq_printf(s, ",discard");
591 613
592 return 0; 614 return 0;
593} 615}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b08d09696b3e..8d53f66b5bcc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -45,7 +45,6 @@
45#include <linux/fs.h> 45#include <linux/fs.h>
46#include <linux/bio.h> 46#include <linux/bio.h>
47#include <linux/gfs2_ondisk.h> 47#include <linux/gfs2_ondisk.h>
48#include <linux/lm_interface.h>
49#include <linux/kthread.h> 48#include <linux/kthread.h>
50#include <linux/freezer.h> 49#include <linux/freezer.h>
51 50
@@ -80,6 +79,51 @@ struct gfs2_quota_change_host {
80 u32 qc_id; 79 u32 qc_id;
81}; 80};
82 81
82static LIST_HEAD(qd_lru_list);
83static atomic_t qd_lru_count = ATOMIC_INIT(0);
84static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED;
85
86int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
87{
88 struct gfs2_quota_data *qd;
89 struct gfs2_sbd *sdp;
90
91 if (nr == 0)
92 goto out;
93
94 if (!(gfp_mask & __GFP_FS))
95 return -1;
96
97 spin_lock(&qd_lru_lock);
98 while (nr && !list_empty(&qd_lru_list)) {
99 qd = list_entry(qd_lru_list.next,
100 struct gfs2_quota_data, qd_reclaim);
101 sdp = qd->qd_gl->gl_sbd;
102
103 /* Free from the filesystem-specific list */
104 list_del(&qd->qd_list);
105
106 gfs2_assert_warn(sdp, !qd->qd_change);
107 gfs2_assert_warn(sdp, !qd->qd_slot_count);
108 gfs2_assert_warn(sdp, !qd->qd_bh_count);
109
110 gfs2_glock_put(qd->qd_gl);
111 atomic_dec(&sdp->sd_quota_count);
112
113 /* Delete it from the common reclaim list */
114 list_del_init(&qd->qd_reclaim);
115 atomic_dec(&qd_lru_count);
116 spin_unlock(&qd_lru_lock);
117 kmem_cache_free(gfs2_quotad_cachep, qd);
118 spin_lock(&qd_lru_lock);
119 nr--;
120 }
121 spin_unlock(&qd_lru_lock);
122
123out:
124 return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
125}
126
83static u64 qd2offset(struct gfs2_quota_data *qd) 127static u64 qd2offset(struct gfs2_quota_data *qd)
84{ 128{
85 u64 offset; 129 u64 offset;
@@ -100,22 +144,18 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
100 if (!qd) 144 if (!qd)
101 return -ENOMEM; 145 return -ENOMEM;
102 146
103 qd->qd_count = 1; 147 atomic_set(&qd->qd_count, 1);
104 qd->qd_id = id; 148 qd->qd_id = id;
105 if (user) 149 if (user)
106 set_bit(QDF_USER, &qd->qd_flags); 150 set_bit(QDF_USER, &qd->qd_flags);
107 qd->qd_slot = -1; 151 qd->qd_slot = -1;
152 INIT_LIST_HEAD(&qd->qd_reclaim);
108 153
109 error = gfs2_glock_get(sdp, 2 * (u64)id + !user, 154 error = gfs2_glock_get(sdp, 2 * (u64)id + !user,
110 &gfs2_quota_glops, CREATE, &qd->qd_gl); 155 &gfs2_quota_glops, CREATE, &qd->qd_gl);
111 if (error) 156 if (error)
112 goto fail; 157 goto fail;
113 158
114 error = gfs2_lvb_hold(qd->qd_gl);
115 gfs2_glock_put(qd->qd_gl);
116 if (error)
117 goto fail;
118
119 *qdp = qd; 159 *qdp = qd;
120 160
121 return 0; 161 return 0;
@@ -135,11 +175,17 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
135 175
136 for (;;) { 176 for (;;) {
137 found = 0; 177 found = 0;
138 spin_lock(&sdp->sd_quota_spin); 178 spin_lock(&qd_lru_lock);
139 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { 179 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
140 if (qd->qd_id == id && 180 if (qd->qd_id == id &&
141 !test_bit(QDF_USER, &qd->qd_flags) == !user) { 181 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
142 qd->qd_count++; 182 if (!atomic_read(&qd->qd_count) &&
183 !list_empty(&qd->qd_reclaim)) {
184 /* Remove it from reclaim list */
185 list_del_init(&qd->qd_reclaim);
186 atomic_dec(&qd_lru_count);
187 }
188 atomic_inc(&qd->qd_count);
143 found = 1; 189 found = 1;
144 break; 190 break;
145 } 191 }
@@ -155,11 +201,11 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
155 new_qd = NULL; 201 new_qd = NULL;
156 } 202 }
157 203
158 spin_unlock(&sdp->sd_quota_spin); 204 spin_unlock(&qd_lru_lock);
159 205
160 if (qd || !create) { 206 if (qd || !create) {
161 if (new_qd) { 207 if (new_qd) {
162 gfs2_lvb_unhold(new_qd->qd_gl); 208 gfs2_glock_put(new_qd->qd_gl);
163 kmem_cache_free(gfs2_quotad_cachep, new_qd); 209 kmem_cache_free(gfs2_quotad_cachep, new_qd);
164 } 210 }
165 *qdp = qd; 211 *qdp = qd;
@@ -175,21 +221,18 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
175static void qd_hold(struct gfs2_quota_data *qd) 221static void qd_hold(struct gfs2_quota_data *qd)
176{ 222{
177 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 223 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
178 224 gfs2_assert(sdp, atomic_read(&qd->qd_count));
179 spin_lock(&sdp->sd_quota_spin); 225 atomic_inc(&qd->qd_count);
180 gfs2_assert(sdp, qd->qd_count);
181 qd->qd_count++;
182 spin_unlock(&sdp->sd_quota_spin);
183} 226}
184 227
185static void qd_put(struct gfs2_quota_data *qd) 228static void qd_put(struct gfs2_quota_data *qd)
186{ 229{
187 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 230 if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) {
188 spin_lock(&sdp->sd_quota_spin); 231 /* Add to the reclaim list */
189 gfs2_assert(sdp, qd->qd_count); 232 list_add_tail(&qd->qd_reclaim, &qd_lru_list);
190 if (!--qd->qd_count) 233 atomic_inc(&qd_lru_count);
191 qd->qd_last_touched = jiffies; 234 spin_unlock(&qd_lru_lock);
192 spin_unlock(&sdp->sd_quota_spin); 235 }
193} 236}
194 237
195static int slot_get(struct gfs2_quota_data *qd) 238static int slot_get(struct gfs2_quota_data *qd)
@@ -198,10 +241,10 @@ static int slot_get(struct gfs2_quota_data *qd)
198 unsigned int c, o = 0, b; 241 unsigned int c, o = 0, b;
199 unsigned char byte = 0; 242 unsigned char byte = 0;
200 243
201 spin_lock(&sdp->sd_quota_spin); 244 spin_lock(&qd_lru_lock);
202 245
203 if (qd->qd_slot_count++) { 246 if (qd->qd_slot_count++) {
204 spin_unlock(&sdp->sd_quota_spin); 247 spin_unlock(&qd_lru_lock);
205 return 0; 248 return 0;
206 } 249 }
207 250
@@ -225,13 +268,13 @@ found:
225 268
226 sdp->sd_quota_bitmap[c][o] |= 1 << b; 269 sdp->sd_quota_bitmap[c][o] |= 1 << b;
227 270
228 spin_unlock(&sdp->sd_quota_spin); 271 spin_unlock(&qd_lru_lock);
229 272
230 return 0; 273 return 0;
231 274
232fail: 275fail:
233 qd->qd_slot_count--; 276 qd->qd_slot_count--;
234 spin_unlock(&sdp->sd_quota_spin); 277 spin_unlock(&qd_lru_lock);
235 return -ENOSPC; 278 return -ENOSPC;
236} 279}
237 280
@@ -239,23 +282,23 @@ static void slot_hold(struct gfs2_quota_data *qd)
239{ 282{
240 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 283 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
241 284
242 spin_lock(&sdp->sd_quota_spin); 285 spin_lock(&qd_lru_lock);
243 gfs2_assert(sdp, qd->qd_slot_count); 286 gfs2_assert(sdp, qd->qd_slot_count);
244 qd->qd_slot_count++; 287 qd->qd_slot_count++;
245 spin_unlock(&sdp->sd_quota_spin); 288 spin_unlock(&qd_lru_lock);
246} 289}
247 290
248static void slot_put(struct gfs2_quota_data *qd) 291static void slot_put(struct gfs2_quota_data *qd)
249{ 292{
250 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 293 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
251 294
252 spin_lock(&sdp->sd_quota_spin); 295 spin_lock(&qd_lru_lock);
253 gfs2_assert(sdp, qd->qd_slot_count); 296 gfs2_assert(sdp, qd->qd_slot_count);
254 if (!--qd->qd_slot_count) { 297 if (!--qd->qd_slot_count) {
255 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); 298 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
256 qd->qd_slot = -1; 299 qd->qd_slot = -1;
257 } 300 }
258 spin_unlock(&sdp->sd_quota_spin); 301 spin_unlock(&qd_lru_lock);
259} 302}
260 303
261static int bh_get(struct gfs2_quota_data *qd) 304static int bh_get(struct gfs2_quota_data *qd)
@@ -330,7 +373,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
330 if (sdp->sd_vfs->s_flags & MS_RDONLY) 373 if (sdp->sd_vfs->s_flags & MS_RDONLY)
331 return 0; 374 return 0;
332 375
333 spin_lock(&sdp->sd_quota_spin); 376 spin_lock(&qd_lru_lock);
334 377
335 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { 378 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
336 if (test_bit(QDF_LOCKED, &qd->qd_flags) || 379 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
@@ -341,8 +384,8 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
341 list_move_tail(&qd->qd_list, &sdp->sd_quota_list); 384 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
342 385
343 set_bit(QDF_LOCKED, &qd->qd_flags); 386 set_bit(QDF_LOCKED, &qd->qd_flags);
344 gfs2_assert_warn(sdp, qd->qd_count); 387 gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
345 qd->qd_count++; 388 atomic_inc(&qd->qd_count);
346 qd->qd_change_sync = qd->qd_change; 389 qd->qd_change_sync = qd->qd_change;
347 gfs2_assert_warn(sdp, qd->qd_slot_count); 390 gfs2_assert_warn(sdp, qd->qd_slot_count);
348 qd->qd_slot_count++; 391 qd->qd_slot_count++;
@@ -354,7 +397,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
354 if (!found) 397 if (!found)
355 qd = NULL; 398 qd = NULL;
356 399
357 spin_unlock(&sdp->sd_quota_spin); 400 spin_unlock(&qd_lru_lock);
358 401
359 if (qd) { 402 if (qd) {
360 gfs2_assert_warn(sdp, qd->qd_change_sync); 403 gfs2_assert_warn(sdp, qd->qd_change_sync);
@@ -379,24 +422,24 @@ static int qd_trylock(struct gfs2_quota_data *qd)
379 if (sdp->sd_vfs->s_flags & MS_RDONLY) 422 if (sdp->sd_vfs->s_flags & MS_RDONLY)
380 return 0; 423 return 0;
381 424
382 spin_lock(&sdp->sd_quota_spin); 425 spin_lock(&qd_lru_lock);
383 426
384 if (test_bit(QDF_LOCKED, &qd->qd_flags) || 427 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
385 !test_bit(QDF_CHANGE, &qd->qd_flags)) { 428 !test_bit(QDF_CHANGE, &qd->qd_flags)) {
386 spin_unlock(&sdp->sd_quota_spin); 429 spin_unlock(&qd_lru_lock);
387 return 0; 430 return 0;
388 } 431 }
389 432
390 list_move_tail(&qd->qd_list, &sdp->sd_quota_list); 433 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
391 434
392 set_bit(QDF_LOCKED, &qd->qd_flags); 435 set_bit(QDF_LOCKED, &qd->qd_flags);
393 gfs2_assert_warn(sdp, qd->qd_count); 436 gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
394 qd->qd_count++; 437 atomic_inc(&qd->qd_count);
395 qd->qd_change_sync = qd->qd_change; 438 qd->qd_change_sync = qd->qd_change;
396 gfs2_assert_warn(sdp, qd->qd_slot_count); 439 gfs2_assert_warn(sdp, qd->qd_slot_count);
397 qd->qd_slot_count++; 440 qd->qd_slot_count++;
398 441
399 spin_unlock(&sdp->sd_quota_spin); 442 spin_unlock(&qd_lru_lock);
400 443
401 gfs2_assert_warn(sdp, qd->qd_change_sync); 444 gfs2_assert_warn(sdp, qd->qd_change_sync);
402 if (bh_get(qd)) { 445 if (bh_get(qd)) {
@@ -556,9 +599,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
556 x = be64_to_cpu(qc->qc_change) + change; 599 x = be64_to_cpu(qc->qc_change) + change;
557 qc->qc_change = cpu_to_be64(x); 600 qc->qc_change = cpu_to_be64(x);
558 601
559 spin_lock(&sdp->sd_quota_spin); 602 spin_lock(&qd_lru_lock);
560 qd->qd_change = x; 603 qd->qd_change = x;
561 spin_unlock(&sdp->sd_quota_spin); 604 spin_unlock(&qd_lru_lock);
562 605
563 if (!x) { 606 if (!x) {
564 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); 607 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
@@ -802,8 +845,8 @@ restart:
802 loff_t pos; 845 loff_t pos;
803 gfs2_glock_dq_uninit(q_gh); 846 gfs2_glock_dq_uninit(q_gh);
804 error = gfs2_glock_nq_init(qd->qd_gl, 847 error = gfs2_glock_nq_init(qd->qd_gl,
805 LM_ST_EXCLUSIVE, GL_NOCACHE, 848 LM_ST_EXCLUSIVE, GL_NOCACHE,
806 q_gh); 849 q_gh);
807 if (error) 850 if (error)
808 return error; 851 return error;
809 852
@@ -820,7 +863,6 @@ restart:
820 863
821 gfs2_glock_dq_uninit(&i_gh); 864 gfs2_glock_dq_uninit(&i_gh);
822 865
823
824 gfs2_quota_in(&q, buf); 866 gfs2_quota_in(&q, buf);
825 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 867 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
826 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); 868 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
@@ -890,9 +932,9 @@ static int need_sync(struct gfs2_quota_data *qd)
890 if (!qd->qd_qb.qb_limit) 932 if (!qd->qd_qb.qb_limit)
891 return 0; 933 return 0;
892 934
893 spin_lock(&sdp->sd_quota_spin); 935 spin_lock(&qd_lru_lock);
894 value = qd->qd_change; 936 value = qd->qd_change;
895 spin_unlock(&sdp->sd_quota_spin); 937 spin_unlock(&qd_lru_lock);
896 938
897 spin_lock(&gt->gt_spin); 939 spin_lock(&gt->gt_spin);
898 num = gt->gt_quota_scale_num; 940 num = gt->gt_quota_scale_num;
@@ -985,9 +1027,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
985 continue; 1027 continue;
986 1028
987 value = (s64)be64_to_cpu(qd->qd_qb.qb_value); 1029 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
988 spin_lock(&sdp->sd_quota_spin); 1030 spin_lock(&qd_lru_lock);
989 value += qd->qd_change; 1031 value += qd->qd_change;
990 spin_unlock(&sdp->sd_quota_spin); 1032 spin_unlock(&qd_lru_lock);
991 1033
992 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1034 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
993 print_message(qd, "exceeded"); 1035 print_message(qd, "exceeded");
@@ -1171,13 +1213,12 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
1171 qd->qd_change = qc.qc_change; 1213 qd->qd_change = qc.qc_change;
1172 qd->qd_slot = slot; 1214 qd->qd_slot = slot;
1173 qd->qd_slot_count = 1; 1215 qd->qd_slot_count = 1;
1174 qd->qd_last_touched = jiffies;
1175 1216
1176 spin_lock(&sdp->sd_quota_spin); 1217 spin_lock(&qd_lru_lock);
1177 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); 1218 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
1178 list_add(&qd->qd_list, &sdp->sd_quota_list); 1219 list_add(&qd->qd_list, &sdp->sd_quota_list);
1179 atomic_inc(&sdp->sd_quota_count); 1220 atomic_inc(&sdp->sd_quota_count);
1180 spin_unlock(&sdp->sd_quota_spin); 1221 spin_unlock(&qd_lru_lock);
1181 1222
1182 found++; 1223 found++;
1183 } 1224 }
@@ -1197,73 +1238,48 @@ fail:
1197 return error; 1238 return error;
1198} 1239}
1199 1240
1200static void gfs2_quota_scan(struct gfs2_sbd *sdp)
1201{
1202 struct gfs2_quota_data *qd, *safe;
1203 LIST_HEAD(dead);
1204
1205 spin_lock(&sdp->sd_quota_spin);
1206 list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
1207 if (!qd->qd_count &&
1208 time_after_eq(jiffies, qd->qd_last_touched +
1209 gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
1210 list_move(&qd->qd_list, &dead);
1211 gfs2_assert_warn(sdp,
1212 atomic_read(&sdp->sd_quota_count) > 0);
1213 atomic_dec(&sdp->sd_quota_count);
1214 }
1215 }
1216 spin_unlock(&sdp->sd_quota_spin);
1217
1218 while (!list_empty(&dead)) {
1219 qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
1220 list_del(&qd->qd_list);
1221
1222 gfs2_assert_warn(sdp, !qd->qd_change);
1223 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1224 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1225
1226 gfs2_lvb_unhold(qd->qd_gl);
1227 kmem_cache_free(gfs2_quotad_cachep, qd);
1228 }
1229}
1230
1231void gfs2_quota_cleanup(struct gfs2_sbd *sdp) 1241void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
1232{ 1242{
1233 struct list_head *head = &sdp->sd_quota_list; 1243 struct list_head *head = &sdp->sd_quota_list;
1234 struct gfs2_quota_data *qd; 1244 struct gfs2_quota_data *qd;
1235 unsigned int x; 1245 unsigned int x;
1236 1246
1237 spin_lock(&sdp->sd_quota_spin); 1247 spin_lock(&qd_lru_lock);
1238 while (!list_empty(head)) { 1248 while (!list_empty(head)) {
1239 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); 1249 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
1240 1250
1241 if (qd->qd_count > 1 || 1251 if (atomic_read(&qd->qd_count) > 1 ||
1242 (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) { 1252 (atomic_read(&qd->qd_count) &&
1253 !test_bit(QDF_CHANGE, &qd->qd_flags))) {
1243 list_move(&qd->qd_list, head); 1254 list_move(&qd->qd_list, head);
1244 spin_unlock(&sdp->sd_quota_spin); 1255 spin_unlock(&qd_lru_lock);
1245 schedule(); 1256 schedule();
1246 spin_lock(&sdp->sd_quota_spin); 1257 spin_lock(&qd_lru_lock);
1247 continue; 1258 continue;
1248 } 1259 }
1249 1260
1250 list_del(&qd->qd_list); 1261 list_del(&qd->qd_list);
1262 /* Also remove if this qd exists in the reclaim list */
1263 if (!list_empty(&qd->qd_reclaim)) {
1264 list_del_init(&qd->qd_reclaim);
1265 atomic_dec(&qd_lru_count);
1266 }
1251 atomic_dec(&sdp->sd_quota_count); 1267 atomic_dec(&sdp->sd_quota_count);
1252 spin_unlock(&sdp->sd_quota_spin); 1268 spin_unlock(&qd_lru_lock);
1253 1269
1254 if (!qd->qd_count) { 1270 if (!atomic_read(&qd->qd_count)) {
1255 gfs2_assert_warn(sdp, !qd->qd_change); 1271 gfs2_assert_warn(sdp, !qd->qd_change);
1256 gfs2_assert_warn(sdp, !qd->qd_slot_count); 1272 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1257 } else 1273 } else
1258 gfs2_assert_warn(sdp, qd->qd_slot_count == 1); 1274 gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
1259 gfs2_assert_warn(sdp, !qd->qd_bh_count); 1275 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1260 1276
1261 gfs2_lvb_unhold(qd->qd_gl); 1277 gfs2_glock_put(qd->qd_gl);
1262 kmem_cache_free(gfs2_quotad_cachep, qd); 1278 kmem_cache_free(gfs2_quotad_cachep, qd);
1263 1279
1264 spin_lock(&sdp->sd_quota_spin); 1280 spin_lock(&qd_lru_lock);
1265 } 1281 }
1266 spin_unlock(&sdp->sd_quota_spin); 1282 spin_unlock(&qd_lru_lock);
1267 1283
1268 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); 1284 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
1269 1285
@@ -1341,9 +1357,6 @@ int gfs2_quotad(void *data)
1341 quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, 1357 quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
1342 &quotad_timeo, &tune->gt_quota_quantum); 1358 &quotad_timeo, &tune->gt_quota_quantum);
1343 1359
1344 /* FIXME: This should be turned into a shrinker */
1345 gfs2_quota_scan(sdp);
1346
1347 /* Check for & recover partially truncated inodes */ 1360 /* Check for & recover partially truncated inodes */
1348 quotad_check_trunc_list(sdp); 1361 quotad_check_trunc_list(sdp);
1349 1362
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index cec9032be97d..0fa5fa63d0e8 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -49,4 +49,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
49 return ret; 49 return ret;
50} 50}
51 51
52extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
53
52#endif /* __QUOTA_DOT_H__ */ 54#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index efd09c3d2b26..247e8f7d6b3d 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/crc32.h> 15#include <linux/crc32.h>
16#include <linux/lm_interface.h>
17#include <linux/kthread.h> 16#include <linux/kthread.h>
18#include <linux/freezer.h> 17#include <linux/freezer.h>
19 18
@@ -427,20 +426,23 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
427} 426}
428 427
429 428
430static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, 429static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
431 unsigned int message) 430 unsigned int message)
432{ 431{
433 if (!sdp->sd_lockstruct.ls_ops->lm_recovery_done) 432 char env_jid[20];
434 return; 433 char env_status[20];
435 434 char *envp[] = { env_jid, env_status, NULL };
436 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 435 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
437 sdp->sd_lockstruct.ls_ops->lm_recovery_done( 436 ls->ls_recover_jid_done = jid;
438 sdp->sd_lockstruct.ls_lockspace, jid, message); 437 ls->ls_recover_jid_status = message;
438 sprintf(env_jid, "JID=%d", jid);
439 sprintf(env_status, "RECOVERY=%s",
440 message == LM_RD_SUCCESS ? "Done" : "Failed");
441 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
439} 442}
440 443
441
442/** 444/**
443 * gfs2_recover_journal - recovery a given journal 445 * gfs2_recover_journal - recover a given journal
444 * @jd: the struct gfs2_jdesc describing the journal 446 * @jd: the struct gfs2_jdesc describing the journal
445 * 447 *
446 * Acquire the journal's lock, check to see if the journal is clean, and 448 * Acquire the journal's lock, check to see if the journal is clean, and
@@ -561,7 +563,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
561 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) 563 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
562 gfs2_glock_dq_uninit(&ji_gh); 564 gfs2_glock_dq_uninit(&ji_gh);
563 565
564 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); 566 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
565 567
566 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) 568 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
567 gfs2_glock_dq_uninit(&j_gh); 569 gfs2_glock_dq_uninit(&j_gh);
@@ -581,7 +583,7 @@ fail_gunlock_j:
581 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); 583 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
582 584
583fail: 585fail:
584 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); 586 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
585 return error; 587 return error;
586} 588}
587 589
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8b01c635d925..f03d024038ea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -13,8 +13,8 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17#include <linux/prefetch.h> 16#include <linux/prefetch.h>
17#include <linux/blkdev.h>
18 18
19#include "gfs2.h" 19#include "gfs2.h"
20#include "incore.h" 20#include "incore.h"
@@ -132,81 +132,90 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
132} 132}
133 133
134/** 134/**
135 * gfs2_bit_search
136 * @ptr: Pointer to bitmap data
137 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
138 * @state: The state we are searching for
139 *
140 * We xor the bitmap data with a patter which is the bitwise opposite
141 * of what we are looking for, this gives rise to a pattern of ones
142 * wherever there is a match. Since we have two bits per entry, we
143 * take this pattern, shift it down by one place and then and it with
144 * the original. All the even bit positions (0,2,4, etc) then represent
145 * successful matches, so we mask with 0x55555..... to remove the unwanted
146 * odd bit positions.
147 *
148 * This allows searching of a whole u64 at once (32 blocks) with a
149 * single test (on 64 bit arches).
150 */
151
152static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
153{
154 u64 tmp;
155 static const u64 search[] = {
156 [0] = 0xffffffffffffffffULL,
157 [1] = 0xaaaaaaaaaaaaaaaaULL,
158 [2] = 0x5555555555555555ULL,
159 [3] = 0x0000000000000000ULL,
160 };
161 tmp = le64_to_cpu(*ptr) ^ search[state];
162 tmp &= (tmp >> 1);
163 tmp &= mask;
164 return tmp;
165}
166
167/**
135 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 168 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
136 * a block in a given allocation state. 169 * a block in a given allocation state.
137 * @buffer: the buffer that holds the bitmaps 170 * @buffer: the buffer that holds the bitmaps
138 * @buflen: the length (in bytes) of the buffer 171 * @len: the length (in bytes) of the buffer
139 * @goal: start search at this block's bit-pair (within @buffer) 172 * @goal: start search at this block's bit-pair (within @buffer)
140 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for. 173 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
141 * 174 *
142 * Scope of @goal and returned block number is only within this bitmap buffer, 175 * Scope of @goal and returned block number is only within this bitmap buffer,
143 * not entire rgrp or filesystem. @buffer will be offset from the actual 176 * not entire rgrp or filesystem. @buffer will be offset from the actual
144 * beginning of a bitmap block buffer, skipping any header structures. 177 * beginning of a bitmap block buffer, skipping any header structures, but
178 * headers are always a multiple of 64 bits long so that the buffer is
179 * always aligned to a 64 bit boundary.
180 *
181 * The size of the buffer is in bytes, but is it assumed that it is
182 * always ok to to read a complete multiple of 64 bits at the end
183 * of the block in case the end is no aligned to a natural boundary.
145 * 184 *
146 * Return: the block number (bitmap buffer scope) that was found 185 * Return: the block number (bitmap buffer scope) that was found
147 */ 186 */
148 187
149static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, 188static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
150 u8 old_state) 189 u32 goal, u8 state)
151{ 190{
152 const u8 *byte, *start, *end; 191 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
153 int bit, startbit; 192 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
154 u32 g1, g2, misaligned; 193 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
155 unsigned long *plong; 194 u64 tmp;
156 unsigned long lskipval; 195 u64 mask = 0x5555555555555555ULL;
157 196 u32 bit;
158 lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55; 197
159 g1 = (goal / GFS2_NBBY); 198 BUG_ON(state > 3);
160 start = buffer + g1; 199
161 byte = start; 200 /* Mask off bits we don't care about at the start of the search */
162 end = buffer + buflen; 201 mask <<= spoint;
163 g2 = ALIGN(g1, sizeof(unsigned long)); 202 tmp = gfs2_bit_search(ptr, mask, state);
164 plong = (unsigned long *)(buffer + g2); 203 ptr++;
165 startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; 204 while(tmp == 0 && ptr < end) {
166 misaligned = g2 - g1; 205 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
167 if (!misaligned) 206 ptr++;
168 goto ulong_aligned;
169/* parse the bitmap a byte at a time */
170misaligned:
171 while (byte < end) {
172 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
173 return goal +
174 (((byte - start) * GFS2_NBBY) +
175 ((bit - startbit) >> 1));
176 }
177 bit += GFS2_BIT_SIZE;
178 if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
179 bit = 0;
180 byte++;
181 misaligned--;
182 if (!misaligned) {
183 plong = (unsigned long *)byte;
184 goto ulong_aligned;
185 }
186 }
187 }
188 return BFITNOENT;
189
190/* parse the bitmap a unsigned long at a time */
191ulong_aligned:
192 /* Stop at "end - 1" or else prefetch can go past the end and segfault.
193 We could "if" it but we'd lose some of the performance gained.
194 This way will only slow down searching the very last 4/8 bytes
195 depending on architecture. I've experimented with several ways
196 of writing this section such as using an else before the goto
197 but this one seems to be the fastest. */
198 while ((unsigned char *)plong < end - sizeof(unsigned long)) {
199 prefetch(plong + 1);
200 if (((*plong) & LBITMASK) != lskipval)
201 break;
202 plong++;
203 }
204 if ((unsigned char *)plong < end) {
205 byte = (const u8 *)plong;
206 misaligned += sizeof(unsigned long) - 1;
207 goto misaligned;
208 } 207 }
209 return BFITNOENT; 208 /* Mask off any bits which are more than len bytes from the start */
209 if (ptr == end && (len & (sizeof(u64) - 1)))
210 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
211 /* Didn't find anything, so return */
212 if (tmp == 0)
213 return BFITNOENT;
214 ptr--;
215 bit = fls64(tmp);
216 bit--; /* fls64 always adds one to the bit count */
217 bit /= 2; /* two bits per entry in the bitmap */
218 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
210} 219}
211 220
212/** 221/**
@@ -831,6 +840,58 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
831 spin_unlock(&sdp->sd_rindex_spin); 840 spin_unlock(&sdp->sd_rindex_spin);
832} 841}
833 842
843static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
844 const struct gfs2_bitmap *bi)
845{
846 struct super_block *sb = sdp->sd_vfs;
847 struct block_device *bdev = sb->s_bdev;
848 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
849 bdev_hardsect_size(sb->s_bdev);
850 u64 blk;
851 sector_t start = 0;
852 sector_t nr_sects = 0;
853 int rv;
854 unsigned int x;
855
856 for (x = 0; x < bi->bi_len; x++) {
857 const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x;
858 const u8 *clone = bi->bi_clone + bi->bi_offset + x;
859 u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
860 diff &= 0x55;
861 if (diff == 0)
862 continue;
863 blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
864 blk *= sects_per_blk; /* convert to sectors */
865 while(diff) {
866 if (diff & 1) {
867 if (nr_sects == 0)
868 goto start_new_extent;
869 if ((start + nr_sects) != blk) {
870 rv = blkdev_issue_discard(bdev, start,
871 nr_sects, GFP_NOFS);
872 if (rv)
873 goto fail;
874 nr_sects = 0;
875start_new_extent:
876 start = blk;
877 }
878 nr_sects += sects_per_blk;
879 }
880 diff >>= 2;
881 blk += sects_per_blk;
882 }
883 }
884 if (nr_sects) {
885 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS);
886 if (rv)
887 goto fail;
888 }
889 return;
890fail:
891 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
892 sdp->sd_args.ar_discard = 0;
893}
894
834void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) 895void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
835{ 896{
836 struct gfs2_sbd *sdp = rgd->rd_sbd; 897 struct gfs2_sbd *sdp = rgd->rd_sbd;
@@ -841,6 +902,8 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
841 struct gfs2_bitmap *bi = rgd->rd_bits + x; 902 struct gfs2_bitmap *bi = rgd->rd_bits + x;
842 if (!bi->bi_clone) 903 if (!bi->bi_clone)
843 continue; 904 continue;
905 if (sdp->sd_args.ar_discard)
906 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi);
844 memcpy(bi->bi_clone + bi->bi_offset, 907 memcpy(bi->bi_clone + bi->bi_offset,
845 bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); 908 bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
846 } 909 }
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 141b781f2fcc..601913e0a482 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -15,7 +15,6 @@
15#include <linux/crc32.h> 15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/bio.h> 17#include <linux/bio.h>
18#include <linux/lm_interface.h>
19 18
20#include "gfs2.h" 19#include "gfs2.h"
21#include "incore.h" 20#include "incore.h"
@@ -339,7 +338,6 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
339 struct gfs2_holder *t_gh) 338 struct gfs2_holder *t_gh)
340{ 339{
341 struct gfs2_inode *ip; 340 struct gfs2_inode *ip;
342 struct gfs2_holder ji_gh;
343 struct gfs2_jdesc *jd; 341 struct gfs2_jdesc *jd;
344 struct lfcc *lfcc; 342 struct lfcc *lfcc;
345 LIST_HEAD(list); 343 LIST_HEAD(list);
@@ -387,7 +385,6 @@ out:
387 gfs2_glock_dq_uninit(&lfcc->gh); 385 gfs2_glock_dq_uninit(&lfcc->gh);
388 kfree(lfcc); 386 kfree(lfcc);
389 } 387 }
390 gfs2_glock_dq_uninit(&ji_gh);
391 return error; 388 return error;
392} 389}
393 390
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index f6b8b00ad881..91abdbedcc86 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -14,7 +14,7 @@
14#include <linux/dcache.h> 14#include <linux/dcache.h>
15#include "incore.h" 15#include "incore.h"
16 16
17void gfs2_lm_unmount(struct gfs2_sbd *sdp); 17extern void gfs2_lm_unmount(struct gfs2_sbd *sdp);
18 18
19static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) 19static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
20{ 20{
@@ -27,21 +27,23 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
27 27
28void gfs2_jindex_free(struct gfs2_sbd *sdp); 28void gfs2_jindex_free(struct gfs2_sbd *sdp);
29 29
30struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); 30extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data);
31int gfs2_jdesc_check(struct gfs2_jdesc *jd);
32 31
33int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, 32extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
34 struct gfs2_inode **ipp); 33extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
35 34
36int gfs2_make_fs_rw(struct gfs2_sbd *sdp); 35extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
36 struct gfs2_inode **ipp);
37 37
38int gfs2_statfs_init(struct gfs2_sbd *sdp); 38extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
39void gfs2_statfs_change(struct gfs2_sbd *sdp,
40 s64 total, s64 free, s64 dinodes);
41int gfs2_statfs_sync(struct gfs2_sbd *sdp);
42 39
43int gfs2_freeze_fs(struct gfs2_sbd *sdp); 40extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
44void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); 41extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
42 s64 dinodes);
43extern int gfs2_statfs_sync(struct gfs2_sbd *sdp);
44
45extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
46extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
45 47
46extern struct file_system_type gfs2_fs_type; 48extern struct file_system_type gfs2_fs_type;
47extern struct file_system_type gfs2meta_fs_type; 49extern struct file_system_type gfs2meta_fs_type;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 26c1fa777a95..7655f5025fec 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -14,9 +14,8 @@
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/kobject.h> 16#include <linux/kobject.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <linux/gfs2_ondisk.h>
20 19
21#include "gfs2.h" 20#include "gfs2.h"
22#include "incore.h" 21#include "incore.h"
@@ -25,6 +24,7 @@
25#include "glock.h" 24#include "glock.h"
26#include "quota.h" 25#include "quota.h"
27#include "util.h" 26#include "util.h"
27#include "glops.h"
28 28
29static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) 29static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
30{ 30{
@@ -37,6 +37,30 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
37 return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname); 37 return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
38} 38}
39 39
40static int gfs2_uuid_valid(const u8 *uuid)
41{
42 int i;
43
44 for (i = 0; i < 16; i++) {
45 if (uuid[i])
46 return 1;
47 }
48 return 0;
49}
50
51static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
52{
53 const u8 *uuid = sdp->sd_sb.sb_uuid;
54 buf[0] = '\0';
55 if (!gfs2_uuid_valid(uuid))
56 return 0;
57 return snprintf(buf, PAGE_SIZE, "%02X%02X%02X%02X-%02X%02X-"
58 "%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
59 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5],
60 uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11],
61 uuid[12], uuid[13], uuid[14], uuid[15]);
62}
63
40static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) 64static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
41{ 65{
42 unsigned int count; 66 unsigned int count;
@@ -148,6 +172,46 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
148 return len; 172 return len;
149} 173}
150 174
175static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
176{
177 struct gfs2_glock *gl;
178 const struct gfs2_glock_operations *glops;
179 unsigned int glmode;
180 unsigned int gltype;
181 unsigned long long glnum;
182 char mode[16];
183 int rv;
184
185 if (!capable(CAP_SYS_ADMIN))
186 return -EACCES;
187
188 rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum,
189 mode);
190 if (rv != 3)
191 return -EINVAL;
192
193 if (strcmp(mode, "EX") == 0)
194 glmode = LM_ST_UNLOCKED;
195 else if ((strcmp(mode, "CW") == 0) || (strcmp(mode, "DF") == 0))
196 glmode = LM_ST_DEFERRED;
197 else if ((strcmp(mode, "PR") == 0) || (strcmp(mode, "SH") == 0))
198 glmode = LM_ST_SHARED;
199 else
200 return -EINVAL;
201
202 if (gltype > LM_TYPE_JOURNAL)
203 return -EINVAL;
204 glops = gfs2_glops_list[gltype];
205 if (glops == NULL)
206 return -EINVAL;
207 rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl);
208 if (rv)
209 return rv;
210 gfs2_glock_cb(gl, glmode);
211 gfs2_glock_put(gl);
212 return len;
213}
214
151struct gfs2_attr { 215struct gfs2_attr {
152 struct attribute attr; 216 struct attribute attr;
153 ssize_t (*show)(struct gfs2_sbd *, char *); 217 ssize_t (*show)(struct gfs2_sbd *, char *);
@@ -159,22 +223,26 @@ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
159 223
160GFS2_ATTR(id, 0444, id_show, NULL); 224GFS2_ATTR(id, 0444, id_show, NULL);
161GFS2_ATTR(fsname, 0444, fsname_show, NULL); 225GFS2_ATTR(fsname, 0444, fsname_show, NULL);
226GFS2_ATTR(uuid, 0444, uuid_show, NULL);
162GFS2_ATTR(freeze, 0644, freeze_show, freeze_store); 227GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
163GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 228GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
164GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store); 229GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
165GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); 230GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
166GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store); 231GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
167GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store); 232GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
233GFS2_ATTR(demote_rq, 0200, NULL, demote_rq_store);
168 234
169static struct attribute *gfs2_attrs[] = { 235static struct attribute *gfs2_attrs[] = {
170 &gfs2_attr_id.attr, 236 &gfs2_attr_id.attr,
171 &gfs2_attr_fsname.attr, 237 &gfs2_attr_fsname.attr,
238 &gfs2_attr_uuid.attr,
172 &gfs2_attr_freeze.attr, 239 &gfs2_attr_freeze.attr,
173 &gfs2_attr_withdraw.attr, 240 &gfs2_attr_withdraw.attr,
174 &gfs2_attr_statfs_sync.attr, 241 &gfs2_attr_statfs_sync.attr,
175 &gfs2_attr_quota_sync.attr, 242 &gfs2_attr_quota_sync.attr,
176 &gfs2_attr_quota_refresh_user.attr, 243 &gfs2_attr_quota_refresh_user.attr,
177 &gfs2_attr_quota_refresh_group.attr, 244 &gfs2_attr_quota_refresh_group.attr,
245 &gfs2_attr_demote_rq.attr,
178 NULL, 246 NULL,
179}; 247};
180 248
@@ -224,14 +292,145 @@ static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
224 292
225LOCKSTRUCT_ATTR(jid, "%u\n"); 293LOCKSTRUCT_ATTR(jid, "%u\n");
226LOCKSTRUCT_ATTR(first, "%u\n"); 294LOCKSTRUCT_ATTR(first, "%u\n");
227LOCKSTRUCT_ATTR(lvb_size, "%u\n");
228LOCKSTRUCT_ATTR(flags, "%d\n");
229 295
230static struct attribute *lockstruct_attrs[] = { 296static struct attribute *lockstruct_attrs[] = {
231 &lockstruct_attr_jid.attr, 297 &lockstruct_attr_jid.attr,
232 &lockstruct_attr_first.attr, 298 &lockstruct_attr_first.attr,
233 &lockstruct_attr_lvb_size.attr, 299 NULL,
234 &lockstruct_attr_flags.attr, 300};
301
302/*
303 * lock_module. Originally from lock_dlm
304 */
305
306static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf)
307{
308 const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops;
309 return sprintf(buf, "%s\n", ops->lm_proto_name);
310}
311
312static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
313{
314 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
315 ssize_t ret;
316 int val = 0;
317
318 if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
319 val = 1;
320 ret = sprintf(buf, "%d\n", val);
321 return ret;
322}
323
324static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
325{
326 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
327 ssize_t ret = len;
328 int val;
329
330 val = simple_strtol(buf, NULL, 0);
331
332 if (val == 1)
333 set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
334 else if (val == 0) {
335 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
336 smp_mb__after_clear_bit();
337 gfs2_glock_thaw(sdp);
338 } else {
339 ret = -EINVAL;
340 }
341 return ret;
342}
343
344static ssize_t lkid_show(struct gfs2_sbd *sdp, char *buf)
345{
346 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
347 return sprintf(buf, "%u\n", ls->ls_id);
348}
349
350static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
351{
352 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
353 return sprintf(buf, "%d\n", ls->ls_first);
354}
355
356static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
357{
358 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
359 return sprintf(buf, "%d\n", ls->ls_first_done);
360}
361
362static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf)
363{
364 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
365 return sprintf(buf, "%d\n", ls->ls_recover_jid);
366}
367
368static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
369{
370 struct gfs2_jdesc *jd;
371
372 spin_lock(&sdp->sd_jindex_spin);
373 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
374 if (jd->jd_jid != jid)
375 continue;
376 jd->jd_dirty = 1;
377 break;
378 }
379 spin_unlock(&sdp->sd_jindex_spin);
380}
381
382static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
383{
384 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
385 ls->ls_recover_jid = simple_strtol(buf, NULL, 0);
386 gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid);
387 if (sdp->sd_recoverd_process)
388 wake_up_process(sdp->sd_recoverd_process);
389 return len;
390}
391
392static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf)
393{
394 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
395 return sprintf(buf, "%d\n", ls->ls_recover_jid_done);
396}
397
398static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
399{
400 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
401 return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
402}
403
404struct gdlm_attr {
405 struct attribute attr;
406 ssize_t (*show)(struct gfs2_sbd *sdp, char *);
407 ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t);
408};
409
410#define GDLM_ATTR(_name,_mode,_show,_store) \
411static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
412
413GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
414GDLM_ATTR(block, 0644, block_show, block_store);
415GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
416GDLM_ATTR(id, 0444, lkid_show, NULL);
417GDLM_ATTR(first, 0444, lkfirst_show, NULL);
418GDLM_ATTR(first_done, 0444, first_done_show, NULL);
419GDLM_ATTR(recover, 0644, recover_show, recover_store);
420GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
421GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
422
423static struct attribute *lock_module_attrs[] = {
424 &gdlm_attr_proto_name.attr,
425 &gdlm_attr_block.attr,
426 &gdlm_attr_withdraw.attr,
427 &gdlm_attr_id.attr,
428 &lockstruct_attr_jid.attr,
429 &gdlm_attr_first.attr,
430 &gdlm_attr_first_done.attr,
431 &gdlm_attr_recover.attr,
432 &gdlm_attr_recover_done.attr,
433 &gdlm_attr_recover_status.attr,
235 NULL, 434 NULL,
236}; 435};
237 436
@@ -373,7 +572,6 @@ TUNE_ATTR(complain_secs, 0);
373TUNE_ATTR(statfs_slow, 0); 572TUNE_ATTR(statfs_slow, 0);
374TUNE_ATTR(new_files_jdata, 0); 573TUNE_ATTR(new_files_jdata, 0);
375TUNE_ATTR(quota_simul_sync, 1); 574TUNE_ATTR(quota_simul_sync, 1);
376TUNE_ATTR(quota_cache_secs, 1);
377TUNE_ATTR(stall_secs, 1); 575TUNE_ATTR(stall_secs, 1);
378TUNE_ATTR(statfs_quantum, 1); 576TUNE_ATTR(statfs_quantum, 1);
379TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); 577TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
@@ -389,7 +587,6 @@ static struct attribute *tune_attrs[] = {
389 &tune_attr_complain_secs.attr, 587 &tune_attr_complain_secs.attr,
390 &tune_attr_statfs_slow.attr, 588 &tune_attr_statfs_slow.attr,
391 &tune_attr_quota_simul_sync.attr, 589 &tune_attr_quota_simul_sync.attr,
392 &tune_attr_quota_cache_secs.attr,
393 &tune_attr_stall_secs.attr, 590 &tune_attr_stall_secs.attr,
394 &tune_attr_statfs_quantum.attr, 591 &tune_attr_statfs_quantum.attr,
395 &tune_attr_recoverd_secs.attr, 592 &tune_attr_recoverd_secs.attr,
@@ -414,6 +611,11 @@ static struct attribute_group tune_group = {
414 .attrs = tune_attrs, 611 .attrs = tune_attrs,
415}; 612};
416 613
614static struct attribute_group lock_module_group = {
615 .name = "lock_module",
616 .attrs = lock_module_attrs,
617};
618
417int gfs2_sys_fs_add(struct gfs2_sbd *sdp) 619int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
418{ 620{
419 int error; 621 int error;
@@ -436,9 +638,15 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
436 if (error) 638 if (error)
437 goto fail_args; 639 goto fail_args;
438 640
641 error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group);
642 if (error)
643 goto fail_tune;
644
439 kobject_uevent(&sdp->sd_kobj, KOBJ_ADD); 645 kobject_uevent(&sdp->sd_kobj, KOBJ_ADD);
440 return 0; 646 return 0;
441 647
648fail_tune:
649 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
442fail_args: 650fail_args:
443 sysfs_remove_group(&sdp->sd_kobj, &args_group); 651 sysfs_remove_group(&sdp->sd_kobj, &args_group);
444fail_lockstruct: 652fail_lockstruct:
@@ -455,15 +663,27 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
455 sysfs_remove_group(&sdp->sd_kobj, &tune_group); 663 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
456 sysfs_remove_group(&sdp->sd_kobj, &args_group); 664 sysfs_remove_group(&sdp->sd_kobj, &args_group);
457 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); 665 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
666 sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
458 kobject_put(&sdp->sd_kobj); 667 kobject_put(&sdp->sd_kobj);
459} 668}
460 669
670
461static int gfs2_uevent(struct kset *kset, struct kobject *kobj, 671static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
462 struct kobj_uevent_env *env) 672 struct kobj_uevent_env *env)
463{ 673{
464 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); 674 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
675 const u8 *uuid = sdp->sd_sb.sb_uuid;
676
465 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 677 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
466 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 678 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
679 if (gfs2_uuid_valid(uuid)) {
680 add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-"
681 "%02X%02X-%02X%02X%02X%02X%02X%02X",
682 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4],
683 uuid[5], uuid[6], uuid[7], uuid[8], uuid[9],
684 uuid[10], uuid[11], uuid[12], uuid[13],
685 uuid[14], uuid[15]);
686 }
467 return 0; 687 return 0;
468} 688}
469 689
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index f677b8a83f0c..053752d4b27f 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -12,9 +12,8 @@
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/completion.h> 13#include <linux/completion.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
17#include <linux/lm_interface.h> 16#include <linux/gfs2_ondisk.h>
18 17
19#include "gfs2.h" 18#include "gfs2.h"
20#include "incore.h" 19#include "incore.h"
@@ -88,9 +87,11 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
88 87
89 if (!tr->tr_touched) { 88 if (!tr->tr_touched) {
90 gfs2_log_release(sdp, tr->tr_reserved); 89 gfs2_log_release(sdp, tr->tr_reserved);
91 gfs2_glock_dq(&tr->tr_t_gh); 90 if (tr->tr_t_gh.gh_gl) {
92 gfs2_holder_uninit(&tr->tr_t_gh); 91 gfs2_glock_dq(&tr->tr_t_gh);
93 kfree(tr); 92 gfs2_holder_uninit(&tr->tr_t_gh);
93 kfree(tr);
94 }
94 return; 95 return;
95 } 96 }
96 97
@@ -106,9 +107,11 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
106 } 107 }
107 108
108 gfs2_log_commit(sdp, tr); 109 gfs2_log_commit(sdp, tr);
109 gfs2_glock_dq(&tr->tr_t_gh); 110 if (tr->tr_t_gh.gh_gl) {
110 gfs2_holder_uninit(&tr->tr_t_gh); 111 gfs2_glock_dq(&tr->tr_t_gh);
111 kfree(tr); 112 gfs2_holder_uninit(&tr->tr_t_gh);
113 kfree(tr);
114 }
112 115
113 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) 116 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
114 gfs2_log_flush(sdp, NULL); 117 gfs2_log_flush(sdp, NULL);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 374f50e95496..9d12b1118ba0 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/crc32.h> 14#include <linux/crc32.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17#include <asm/uaccess.h> 16#include <asm/uaccess.h>
18 17
19#include "gfs2.h" 18#include "gfs2.h"
@@ -35,6 +34,8 @@ void gfs2_assert_i(struct gfs2_sbd *sdp)
35 34
36int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) 35int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
37{ 36{
37 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
38 const struct lm_lockops *lm = ls->ls_ops;
38 va_list args; 39 va_list args;
39 40
40 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 41 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
@@ -47,8 +48,12 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
47 fs_err(sdp, "about to withdraw this file system\n"); 48 fs_err(sdp, "about to withdraw this file system\n");
48 BUG_ON(sdp->sd_args.ar_debug); 49 BUG_ON(sdp->sd_args.ar_debug);
49 50
50 fs_err(sdp, "telling LM to withdraw\n"); 51 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
51 gfs2_withdraw_lockproto(&sdp->sd_lockstruct); 52
53 if (lm->lm_unmount) {
54 fs_err(sdp, "telling LM to unmount\n");
55 lm->lm_unmount(sdp);
56 }
52 fs_err(sdp, "withdrawn\n"); 57 fs_err(sdp, "withdrawn\n");
53 dump_stack(); 58 dump_stack();
54 59
diff --git a/fs/inode.c b/fs/inode.c
index 913ab2d9a5d1..643ac43e5a5c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -17,6 +17,7 @@
17#include <linux/hash.h> 17#include <linux/hash.h>
18#include <linux/swap.h> 18#include <linux/swap.h>
19#include <linux/security.h> 19#include <linux/security.h>
20#include <linux/ima.h>
20#include <linux/pagemap.h> 21#include <linux/pagemap.h>
21#include <linux/cdev.h> 22#include <linux/cdev.h>
22#include <linux/bootmem.h> 23#include <linux/bootmem.h>
@@ -147,13 +148,13 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
147 inode->i_cdev = NULL; 148 inode->i_cdev = NULL;
148 inode->i_rdev = 0; 149 inode->i_rdev = 0;
149 inode->dirtied_when = 0; 150 inode->dirtied_when = 0;
150 if (security_inode_alloc(inode)) { 151
151 if (inode->i_sb->s_op->destroy_inode) 152 if (security_inode_alloc(inode))
152 inode->i_sb->s_op->destroy_inode(inode); 153 goto out_free_inode;
153 else 154
154 kmem_cache_free(inode_cachep, (inode)); 155 /* allocate and initialize an i_integrity */
155 return NULL; 156 if (ima_inode_alloc(inode))
156 } 157 goto out_free_security;
157 158
158 spin_lock_init(&inode->i_lock); 159 spin_lock_init(&inode->i_lock);
159 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 160 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
@@ -189,6 +190,15 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
189 inode->i_mapping = mapping; 190 inode->i_mapping = mapping;
190 191
191 return inode; 192 return inode;
193
194out_free_security:
195 security_inode_free(inode);
196out_free_inode:
197 if (inode->i_sb->s_op->destroy_inode)
198 inode->i_sb->s_op->destroy_inode(inode);
199 else
200 kmem_cache_free(inode_cachep, (inode));
201 return NULL;
192} 202}
193EXPORT_SYMBOL(inode_init_always); 203EXPORT_SYMBOL(inode_init_always);
194 204
@@ -359,6 +369,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
359 invalidate_inode_buffers(inode); 369 invalidate_inode_buffers(inode);
360 if (!atomic_read(&inode->i_count)) { 370 if (!atomic_read(&inode->i_count)) {
361 list_move(&inode->i_list, dispose); 371 list_move(&inode->i_list, dispose);
372 WARN_ON(inode->i_state & I_NEW);
362 inode->i_state |= I_FREEING; 373 inode->i_state |= I_FREEING;
363 count++; 374 count++;
364 continue; 375 continue;
@@ -460,6 +471,7 @@ static void prune_icache(int nr_to_scan)
460 continue; 471 continue;
461 } 472 }
462 list_move(&inode->i_list, &freeable); 473 list_move(&inode->i_list, &freeable);
474 WARN_ON(inode->i_state & I_NEW);
463 inode->i_state |= I_FREEING; 475 inode->i_state |= I_FREEING;
464 nr_pruned++; 476 nr_pruned++;
465 } 477 }
@@ -656,6 +668,7 @@ void unlock_new_inode(struct inode *inode)
656 * just created it (so there can be no old holders 668 * just created it (so there can be no old holders
657 * that haven't tested I_LOCK). 669 * that haven't tested I_LOCK).
658 */ 670 */
671 WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
659 inode->i_state &= ~(I_LOCK|I_NEW); 672 inode->i_state &= ~(I_LOCK|I_NEW);
660 wake_up_inode(inode); 673 wake_up_inode(inode);
661} 674}
@@ -1145,6 +1158,7 @@ void generic_delete_inode(struct inode *inode)
1145 1158
1146 list_del_init(&inode->i_list); 1159 list_del_init(&inode->i_list);
1147 list_del_init(&inode->i_sb_list); 1160 list_del_init(&inode->i_sb_list);
1161 WARN_ON(inode->i_state & I_NEW);
1148 inode->i_state |= I_FREEING; 1162 inode->i_state |= I_FREEING;
1149 inodes_stat.nr_inodes--; 1163 inodes_stat.nr_inodes--;
1150 spin_unlock(&inode_lock); 1164 spin_unlock(&inode_lock);
@@ -1186,16 +1200,19 @@ static void generic_forget_inode(struct inode *inode)
1186 spin_unlock(&inode_lock); 1200 spin_unlock(&inode_lock);
1187 return; 1201 return;
1188 } 1202 }
1203 WARN_ON(inode->i_state & I_NEW);
1189 inode->i_state |= I_WILL_FREE; 1204 inode->i_state |= I_WILL_FREE;
1190 spin_unlock(&inode_lock); 1205 spin_unlock(&inode_lock);
1191 write_inode_now(inode, 1); 1206 write_inode_now(inode, 1);
1192 spin_lock(&inode_lock); 1207 spin_lock(&inode_lock);
1208 WARN_ON(inode->i_state & I_NEW);
1193 inode->i_state &= ~I_WILL_FREE; 1209 inode->i_state &= ~I_WILL_FREE;
1194 inodes_stat.nr_unused--; 1210 inodes_stat.nr_unused--;
1195 hlist_del_init(&inode->i_hash); 1211 hlist_del_init(&inode->i_hash);
1196 } 1212 }
1197 list_del_init(&inode->i_list); 1213 list_del_init(&inode->i_list);
1198 list_del_init(&inode->i_sb_list); 1214 list_del_init(&inode->i_sb_list);
1215 WARN_ON(inode->i_state & I_NEW);
1199 inode->i_state |= I_FREEING; 1216 inode->i_state |= I_FREEING;
1200 inodes_stat.nr_inodes--; 1217 inodes_stat.nr_inodes--;
1201 spin_unlock(&inode_lock); 1218 spin_unlock(&inode_lock);
@@ -1283,6 +1300,40 @@ sector_t bmap(struct inode * inode, sector_t block)
1283} 1300}
1284EXPORT_SYMBOL(bmap); 1301EXPORT_SYMBOL(bmap);
1285 1302
1303/*
1304 * With relative atime, only update atime if the previous atime is
1305 * earlier than either the ctime or mtime or if at least a day has
1306 * passed since the last atime update.
1307 */
1308static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1309 struct timespec now)
1310{
1311
1312 if (!(mnt->mnt_flags & MNT_RELATIME))
1313 return 1;
1314 /*
1315 * Is mtime younger than atime? If yes, update atime:
1316 */
1317 if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
1318 return 1;
1319 /*
1320 * Is ctime younger than atime? If yes, update atime:
1321 */
1322 if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
1323 return 1;
1324
1325 /*
1326 * Is the previous atime value older than a day? If yes,
1327 * update atime:
1328 */
1329 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
1330 return 1;
1331 /*
1332 * Good, we can skip the atime update:
1333 */
1334 return 0;
1335}
1336
1286/** 1337/**
1287 * touch_atime - update the access time 1338 * touch_atime - update the access time
1288 * @mnt: mount the inode is accessed on 1339 * @mnt: mount the inode is accessed on
@@ -1310,17 +1361,12 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1310 goto out; 1361 goto out;
1311 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1362 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1312 goto out; 1363 goto out;
1313 if (mnt->mnt_flags & MNT_RELATIME) {
1314 /*
1315 * With relative atime, only update atime if the previous
1316 * atime is earlier than either the ctime or mtime.
1317 */
1318 if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 &&
1319 timespec_compare(&inode->i_ctime, &inode->i_atime) < 0)
1320 goto out;
1321 }
1322 1364
1323 now = current_fs_time(inode->i_sb); 1365 now = current_fs_time(inode->i_sb);
1366
1367 if (!relatime_need_update(mnt, inode, now))
1368 goto out;
1369
1324 if (timespec_equal(&inode->i_atime, &now)) 1370 if (timespec_equal(&inode->i_atime, &now))
1325 goto out; 1371 goto out;
1326 1372
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 1f3b0fc0d351..aedc47a264c1 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -139,6 +139,55 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
139 return 0; 139 return 0;
140} 140}
141 141
142#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
143static const struct in6_addr *nlmclnt_map_v4addr(const struct sockaddr *sap,
144 struct in6_addr *addr_mapped)
145{
146 const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
147
148 switch (sap->sa_family) {
149 case AF_INET6:
150 return &((const struct sockaddr_in6 *)sap)->sin6_addr;
151 case AF_INET:
152 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, addr_mapped);
153 return addr_mapped;
154 }
155
156 return NULL;
157}
158
159/*
160 * If lockd is using a PF_INET6 listener, all incoming requests appear
161 * to come from AF_INET6 remotes. The address of AF_INET remotes are
162 * mapped to AF_INET6 automatically by the network layer. In case the
163 * user passed an AF_INET server address at mount time, ensure both
164 * addresses are AF_INET6 before comparing them.
165 */
166static int nlmclnt_cmp_addr(const struct nlm_host *host,
167 const struct sockaddr *sap)
168{
169 const struct in6_addr *addr1;
170 const struct in6_addr *addr2;
171 struct in6_addr addr1_mapped;
172 struct in6_addr addr2_mapped;
173
174 addr1 = nlmclnt_map_v4addr(nlm_addr(host), &addr1_mapped);
175 if (likely(addr1 != NULL)) {
176 addr2 = nlmclnt_map_v4addr(sap, &addr2_mapped);
177 if (likely(addr2 != NULL))
178 return ipv6_addr_equal(addr1, addr2);
179 }
180
181 return 0;
182}
183#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
184static int nlmclnt_cmp_addr(const struct nlm_host *host,
185 const struct sockaddr *sap)
186{
187 return nlm_cmp_addr(nlm_addr(host), sap);
188}
189#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
190
142/* 191/*
143 * The server lockd has called us back to tell us the lock was granted 192 * The server lockd has called us back to tell us the lock was granted
144 */ 193 */
@@ -166,7 +215,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
166 */ 215 */
167 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) 216 if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
168 continue; 217 continue;
169 if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) 218 if (!nlmclnt_cmp_addr(block->b_host, addr))
170 continue; 219 continue;
171 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) 220 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
172 continue; 221 continue;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d1d1eb84679d..618865b3128b 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds 4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * 5 *
6 * Copyright (C) 1996 Gertjan van Wingerde (gertjan@cs.vu.nl) 6 * Copyright (C) 1996 Gertjan van Wingerde
7 * Minix V2 fs support. 7 * Minix V2 fs support.
8 * 8 *
9 * Modified for 680x0 by Andreas Schwab 9 * Modified for 680x0 by Andreas Schwab
diff --git a/fs/namei.c b/fs/namei.c
index bbc15c237558..199317642ad6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -24,6 +24,7 @@
24#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
25#include <linux/personality.h> 25#include <linux/personality.h>
26#include <linux/security.h> 26#include <linux/security.h>
27#include <linux/ima.h>
27#include <linux/syscalls.h> 28#include <linux/syscalls.h>
28#include <linux/mount.h> 29#include <linux/mount.h>
29#include <linux/audit.h> 30#include <linux/audit.h>
@@ -850,6 +851,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
850 if (err == -EAGAIN) 851 if (err == -EAGAIN)
851 err = inode_permission(nd->path.dentry->d_inode, 852 err = inode_permission(nd->path.dentry->d_inode,
852 MAY_EXEC); 853 MAY_EXEC);
854 if (!err)
855 err = ima_path_check(&nd->path, MAY_EXEC);
853 if (err) 856 if (err)
854 break; 857 break;
855 858
@@ -1509,6 +1512,11 @@ int may_open(struct path *path, int acc_mode, int flag)
1509 error = inode_permission(inode, acc_mode); 1512 error = inode_permission(inode, acc_mode);
1510 if (error) 1513 if (error)
1511 return error; 1514 return error;
1515
1516 error = ima_path_check(path,
1517 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
1518 if (error)
1519 return error;
1512 /* 1520 /*
1513 * An append-only file must be opened in append mode for writing. 1521 * An append-only file must be opened in append mode for writing.
1514 */ 1522 */
diff --git a/fs/namespace.c b/fs/namespace.c
index 06f8e63f6cb1..f0e753097353 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -780,6 +780,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
780 { MNT_NOATIME, ",noatime" }, 780 { MNT_NOATIME, ",noatime" },
781 { MNT_NODIRATIME, ",nodiratime" }, 781 { MNT_NODIRATIME, ",nodiratime" },
782 { MNT_RELATIME, ",relatime" }, 782 { MNT_RELATIME, ",relatime" },
783 { MNT_STRICTATIME, ",strictatime" },
783 { 0, NULL } 784 { 0, NULL }
784 }; 785 };
785 const struct proc_fs_info *fs_infop; 786 const struct proc_fs_info *fs_infop;
@@ -1919,6 +1920,9 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1919 if (data_page) 1920 if (data_page)
1920 ((char *)data_page)[PAGE_SIZE - 1] = 0; 1921 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1921 1922
1923 /* Default to relatime */
1924 mnt_flags |= MNT_RELATIME;
1925
1922 /* Separate the per-mountpoint flags */ 1926 /* Separate the per-mountpoint flags */
1923 if (flags & MS_NOSUID) 1927 if (flags & MS_NOSUID)
1924 mnt_flags |= MNT_NOSUID; 1928 mnt_flags |= MNT_NOSUID;
@@ -1930,13 +1934,14 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1930 mnt_flags |= MNT_NOATIME; 1934 mnt_flags |= MNT_NOATIME;
1931 if (flags & MS_NODIRATIME) 1935 if (flags & MS_NODIRATIME)
1932 mnt_flags |= MNT_NODIRATIME; 1936 mnt_flags |= MNT_NODIRATIME;
1933 if (flags & MS_RELATIME) 1937 if (flags & MS_STRICTATIME)
1934 mnt_flags |= MNT_RELATIME; 1938 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
1935 if (flags & MS_RDONLY) 1939 if (flags & MS_RDONLY)
1936 mnt_flags |= MNT_READONLY; 1940 mnt_flags |= MNT_READONLY;
1937 1941
1938 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | 1942 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1939 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); 1943 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
1944 MS_STRICTATIME);
1940 1945
1941 /* ... and get the mountpoint */ 1946 /* ... and get the mountpoint */
1942 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); 1947 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9b728f3565a1..574158ae2398 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -255,6 +255,32 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
255 } 255 }
256 return 0; 256 return 0;
257} 257}
258
259/*
260 * Test if two ip6 socket addresses refer to the same socket by
261 * comparing relevant fields. The padding bytes specifically, are not
262 * compared. sin6_flowinfo is not compared because it only affects QoS
263 * and sin6_scope_id is only compared if the address is "link local"
264 * because "link local" addresses need only be unique to a specific
265 * link. Conversely, ordinary unicast addresses might have different
266 * sin6_scope_id.
267 *
268 * The caller should ensure both socket addresses are AF_INET6.
269 */
270static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
271 const struct sockaddr *sa2)
272{
273 const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1;
274 const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2;
275
276 if (!ipv6_addr_equal(&saddr1->sin6_addr,
277 &saddr1->sin6_addr))
278 return 0;
279 if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
280 saddr1->sin6_scope_id != saddr2->sin6_scope_id)
281 return 0;
282 return saddr1->sin6_port == saddr2->sin6_port;
283}
258#else 284#else
259static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1, 285static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
260 const struct sockaddr_in *sa2) 286 const struct sockaddr_in *sa2)
@@ -270,9 +296,52 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
270 return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1, 296 return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
271 (const struct sockaddr_in *)sa2); 297 (const struct sockaddr_in *)sa2);
272} 298}
299
300static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
301 const struct sockaddr * sa2)
302{
303 return 0;
304}
273#endif 305#endif
274 306
275/* 307/*
308 * Test if two ip4 socket addresses refer to the same socket, by
309 * comparing relevant fields. The padding bytes specifically, are
310 * not compared.
311 *
312 * The caller should ensure both socket addresses are AF_INET.
313 */
314static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
315 const struct sockaddr *sa2)
316{
317 const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1;
318 const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2;
319
320 if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr)
321 return 0;
322 return saddr1->sin_port == saddr2->sin_port;
323}
324
325/*
326 * Test if two socket addresses represent the same actual socket,
327 * by comparing (only) relevant fields.
328 */
329static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
330 const struct sockaddr *sa2)
331{
332 if (sa1->sa_family != sa2->sa_family)
333 return 0;
334
335 switch (sa1->sa_family) {
336 case AF_INET:
337 return nfs_sockaddr_cmp_ip4(sa1, sa2);
338 case AF_INET6:
339 return nfs_sockaddr_cmp_ip6(sa1, sa2);
340 }
341 return 0;
342}
343
344/*
276 * Find a client by IP address and protocol version 345 * Find a client by IP address and protocol version
277 * - returns NULL if no such client 346 * - returns NULL if no such client
278 */ 347 */
@@ -344,8 +413,10 @@ struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
344static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data) 413static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
345{ 414{
346 struct nfs_client *clp; 415 struct nfs_client *clp;
416 const struct sockaddr *sap = data->addr;
347 417
348 list_for_each_entry(clp, &nfs_client_list, cl_share_link) { 418 list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
419 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
349 /* Don't match clients that failed to initialise properly */ 420 /* Don't match clients that failed to initialise properly */
350 if (clp->cl_cons_state < 0) 421 if (clp->cl_cons_state < 0)
351 continue; 422 continue;
@@ -358,7 +429,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
358 continue; 429 continue;
359 430
360 /* Match the full socket address */ 431 /* Match the full socket address */
361 if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0) 432 if (!nfs_sockaddr_cmp(sap, clap))
362 continue; 433 continue;
363 434
364 atomic_inc(&clp->cl_count); 435 atomic_inc(&clp->cl_count);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e35c8199f82f..672368f865ca 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1892,8 +1892,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
1892 cache.cred = cred; 1892 cache.cred = cred;
1893 cache.jiffies = jiffies; 1893 cache.jiffies = jiffies;
1894 status = NFS_PROTO(inode)->access(inode, &cache); 1894 status = NFS_PROTO(inode)->access(inode, &cache);
1895 if (status != 0) 1895 if (status != 0) {
1896 if (status == -ESTALE) {
1897 nfs_zap_caches(inode);
1898 if (!S_ISDIR(inode->i_mode))
1899 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
1900 }
1896 return status; 1901 return status;
1902 }
1897 nfs_access_add_cache(inode, &cache); 1903 nfs_access_add_cache(inode, &cache);
1898out: 1904out:
1899 if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 1905 if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index cef62557c87d..6bbf0e6daad2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -292,7 +292,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
292{ 292{
293 struct nfs_server *server = NFS_SERVER(inode); 293 struct nfs_server *server = NFS_SERVER(inode);
294 struct nfs_fattr fattr; 294 struct nfs_fattr fattr;
295 struct page *pages[NFSACL_MAXPAGES] = { }; 295 struct page *pages[NFSACL_MAXPAGES];
296 struct nfs3_setaclargs args = { 296 struct nfs3_setaclargs args = {
297 .inode = inode, 297 .inode = inode,
298 .mask = NFS_ACL, 298 .mask = NFS_ACL,
@@ -303,7 +303,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
303 .rpc_argp = &args, 303 .rpc_argp = &args,
304 .rpc_resp = &fattr, 304 .rpc_resp = &fattr,
305 }; 305 };
306 int status, count; 306 int status;
307 307
308 status = -EOPNOTSUPP; 308 status = -EOPNOTSUPP;
309 if (!nfs_server_capable(inode, NFS_CAP_ACLS)) 309 if (!nfs_server_capable(inode, NFS_CAP_ACLS))
@@ -319,6 +319,20 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
319 if (S_ISDIR(inode->i_mode)) { 319 if (S_ISDIR(inode->i_mode)) {
320 args.mask |= NFS_DFACL; 320 args.mask |= NFS_DFACL;
321 args.acl_default = dfacl; 321 args.acl_default = dfacl;
322 args.len = nfsacl_size(acl, dfacl);
323 } else
324 args.len = nfsacl_size(acl, NULL);
325
326 if (args.len > NFS_ACL_INLINE_BUFSIZE) {
327 unsigned int npages = 1 + ((args.len - 1) >> PAGE_SHIFT);
328
329 status = -ENOMEM;
330 do {
331 args.pages[args.npages] = alloc_page(GFP_KERNEL);
332 if (args.pages[args.npages] == NULL)
333 goto out_freepages;
334 args.npages++;
335 } while (args.npages < npages);
322 } 336 }
323 337
324 dprintk("NFS call setacl\n"); 338 dprintk("NFS call setacl\n");
@@ -329,10 +343,6 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
329 nfs_zap_acl_cache(inode); 343 nfs_zap_acl_cache(inode);
330 dprintk("NFS reply setacl: %d\n", status); 344 dprintk("NFS reply setacl: %d\n", status);
331 345
332 /* pages may have been allocated at the xdr layer. */
333 for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
334 __free_page(args.pages[count]);
335
336 switch (status) { 346 switch (status) {
337 case 0: 347 case 0:
338 status = nfs_refresh_inode(inode, &fattr); 348 status = nfs_refresh_inode(inode, &fattr);
@@ -346,6 +356,11 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
346 case -ENOTSUPP: 356 case -ENOTSUPP:
347 status = -EOPNOTSUPP; 357 status = -EOPNOTSUPP;
348 } 358 }
359out_freepages:
360 while (args.npages != 0) {
361 args.npages--;
362 __free_page(args.pages[args.npages]);
363 }
349out: 364out:
350 return status; 365 return status;
351} 366}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 11cdddec1432..6cdeacffde46 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -82,8 +82,10 @@
82#define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) 82#define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2)
83 83
84#define ACL3_getaclargs_sz (NFS3_fh_sz+1) 84#define ACL3_getaclargs_sz (NFS3_fh_sz+1)
85#define ACL3_setaclargs_sz (NFS3_fh_sz+1+2*(2+5*3)) 85#define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
86#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+2*(2+5*3)) 86 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
87#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
88 XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
87#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) 89#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
88 90
89/* 91/*
@@ -703,28 +705,18 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
703 struct nfs3_setaclargs *args) 705 struct nfs3_setaclargs *args)
704{ 706{
705 struct xdr_buf *buf = &req->rq_snd_buf; 707 struct xdr_buf *buf = &req->rq_snd_buf;
706 unsigned int base, len_in_head, len = nfsacl_size( 708 unsigned int base;
707 (args->mask & NFS_ACL) ? args->acl_access : NULL, 709 int err;
708 (args->mask & NFS_DFACL) ? args->acl_default : NULL);
709 int count, err;
710 710
711 p = xdr_encode_fhandle(p, NFS_FH(args->inode)); 711 p = xdr_encode_fhandle(p, NFS_FH(args->inode));
712 *p++ = htonl(args->mask); 712 *p++ = htonl(args->mask);
713 base = (char *)p - (char *)buf->head->iov_base; 713 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
714 /* put as much of the acls into head as possible. */ 714 base = req->rq_slen;
715 len_in_head = min_t(unsigned int, buf->head->iov_len - base, len); 715
716 len -= len_in_head; 716 if (args->npages != 0)
717 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2)); 717 xdr_encode_pages(buf, args->pages, 0, args->len);
718 718 else
719 for (count = 0; (count << PAGE_SHIFT) < len; count++) { 719 req->rq_slen += args->len;
720 args->pages[count] = alloc_page(GFP_KERNEL);
721 if (!args->pages[count]) {
722 while (count)
723 __free_page(args->pages[--count]);
724 return -ENOMEM;
725 }
726 }
727 xdr_encode_pages(buf, args->pages, 0, len);
728 720
729 err = nfsacl_encode(buf, base, args->inode, 721 err = nfsacl_encode(buf, base, args->inode,
730 (args->mask & NFS_ACL) ? 722 (args->mask & NFS_ACL) ?
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 30befc39b3c6..2a2a0a7143ad 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -21,7 +21,9 @@
21#define NFSDBG_FACILITY NFSDBG_VFS 21#define NFSDBG_FACILITY NFSDBG_VFS
22 22
23/* 23/*
24 * Check if fs_root is valid 24 * Convert the NFSv4 pathname components into a standard posix path.
25 *
26 * Note that the resulting string will be placed at the end of the buffer
25 */ 27 */
26static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname, 28static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
27 char *buffer, ssize_t buflen) 29 char *buffer, ssize_t buflen)
@@ -99,21 +101,20 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
99{ 101{
100 struct vfsmount *mnt = ERR_PTR(-ENOENT); 102 struct vfsmount *mnt = ERR_PTR(-ENOENT);
101 char *mnt_path; 103 char *mnt_path;
102 int page2len; 104 unsigned int maxbuflen;
103 unsigned int s; 105 unsigned int s;
104 106
105 mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); 107 mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
106 if (IS_ERR(mnt_path)) 108 if (IS_ERR(mnt_path))
107 return mnt; 109 return mnt;
108 mountdata->mnt_path = mnt_path; 110 mountdata->mnt_path = mnt_path;
109 page2 += strlen(mnt_path) + 1; 111 maxbuflen = mnt_path - 1 - page2;
110 page2len = PAGE_SIZE - strlen(mnt_path) - 1;
111 112
112 for (s = 0; s < location->nservers; s++) { 113 for (s = 0; s < location->nservers; s++) {
113 const struct nfs4_string *buf = &location->servers[s]; 114 const struct nfs4_string *buf = &location->servers[s];
114 struct sockaddr_storage addr; 115 struct sockaddr_storage addr;
115 116
116 if (buf->len <= 0 || buf->len >= PAGE_SIZE) 117 if (buf->len <= 0 || buf->len >= maxbuflen)
117 continue; 118 continue;
118 119
119 mountdata->addr = (struct sockaddr *)&addr; 120 mountdata->addr = (struct sockaddr *)&addr;
@@ -126,8 +127,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
126 continue; 127 continue;
127 nfs_set_port(mountdata->addr, NFS_PORT); 128 nfs_set_port(mountdata->addr, NFS_PORT);
128 129
129 strncpy(page2, buf->data, page2len); 130 memcpy(page2, buf->data, buf->len);
130 page2[page2len] = '\0'; 131 page2[buf->len] = '\0';
131 mountdata->hostname = page2; 132 mountdata->hostname = page2;
132 133
133 snprintf(page, PAGE_SIZE, "%s:%s", 134 snprintf(page, PAGE_SIZE, "%s:%s",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f65953be39c0..9250067943d8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2596,6 +2596,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
2596 [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop, 2596 [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
2597 [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop, 2597 [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
2598 [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open, 2598 [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
2599 [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop,
2599 [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm, 2600 [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
2600 [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade, 2601 [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
2601 [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop, 2602 [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 3a9e5deed74d..19e3a96aa02c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -176,7 +176,8 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
176 176
177 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); 177 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
178 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && 178 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
179 (OCFS2_I(inode)->ip_clusters != rec->e_cpos), 179 (OCFS2_I(inode)->ip_clusters !=
180 le32_to_cpu(rec->e_cpos)),
180 "Device %s, asking for sparse allocation: inode %llu, " 181 "Device %s, asking for sparse allocation: inode %llu, "
181 "cpos %u, clusters %u\n", 182 "cpos %u, clusters %u\n",
182 osb->dev_str, 183 osb->dev_str,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a067a6cffb01..8e1709a679b7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -227,7 +227,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
227 size = i_size_read(inode); 227 size = i_size_read(inode);
228 228
229 if (size > PAGE_CACHE_SIZE || 229 if (size > PAGE_CACHE_SIZE ||
230 size > ocfs2_max_inline_data(inode->i_sb)) { 230 size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
231 ocfs2_error(inode->i_sb, 231 ocfs2_error(inode->i_sb,
232 "Inode %llu has with inline data has bad size: %Lu", 232 "Inode %llu has with inline data has bad size: %Lu",
233 (unsigned long long)OCFS2_I(inode)->ip_blkno, 233 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1555,6 +1555,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
1555 int ret, written = 0; 1555 int ret, written = 0;
1556 loff_t end = pos + len; 1556 loff_t end = pos + len;
1557 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1557 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1558 struct ocfs2_dinode *di = NULL;
1558 1559
1559 mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n", 1560 mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
1560 (unsigned long long)oi->ip_blkno, len, (unsigned long long)pos, 1561 (unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
@@ -1587,7 +1588,9 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
1587 /* 1588 /*
1588 * Check whether the write can fit. 1589 * Check whether the write can fit.
1589 */ 1590 */
1590 if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb)) 1591 di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1592 if (mmap_page ||
1593 end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di))
1591 return 0; 1594 return 0;
1592 1595
1593do_inline_write: 1596do_inline_write:
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 084aba86c3b2..4b11762f249e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -532,7 +532,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
532 532
533 fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL); 533 fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
534 534
535 fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb)); 535 fe->id2.i_data.id_count = cpu_to_le16(
536 ocfs2_max_inline_data_with_xattr(osb->sb, fe));
536 } else { 537 } else {
537 fel = &fe->id2.i_list; 538 fel = &fe->id2.i_list;
538 fel->l_tree_depth = 0; 539 fel->l_tree_depth = 0;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c7ae45aaa36c..2332ef740f4f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1070,12 +1070,6 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
1070 offsetof(struct ocfs2_dinode, id2.i_symlink); 1070 offsetof(struct ocfs2_dinode, id2.i_symlink);
1071} 1071}
1072 1072
1073static inline int ocfs2_max_inline_data(struct super_block *sb)
1074{
1075 return sb->s_blocksize -
1076 offsetof(struct ocfs2_dinode, id2.i_data.id_data);
1077}
1078
1079static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb, 1073static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
1080 struct ocfs2_dinode *di) 1074 struct ocfs2_dinode *di)
1081{ 1075{
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4ddd788add67..2563df89fc2a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -547,8 +547,12 @@ int ocfs2_calc_xattr_init(struct inode *dir,
547 * when blocksize = 512, may reserve one more cluser for 547 * when blocksize = 512, may reserve one more cluser for
548 * xattr bucket, otherwise reserve one metadata block 548 * xattr bucket, otherwise reserve one metadata block
549 * for them is ok. 549 * for them is ok.
550 * If this is a new directory with inline data,
551 * we choose to reserve the entire inline area for
552 * directory contents and force an external xattr block.
550 */ 553 */
551 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || 554 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
555 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
552 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { 556 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
553 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); 557 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
554 if (ret) { 558 if (ret) {
@@ -4791,19 +4795,33 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4791 char *val, 4795 char *val,
4792 int value_len) 4796 int value_len)
4793{ 4797{
4794 int offset; 4798 int ret, offset, block_off;
4795 struct ocfs2_xattr_value_root *xv; 4799 struct ocfs2_xattr_value_root *xv;
4796 struct ocfs2_xattr_entry *xe = xs->here; 4800 struct ocfs2_xattr_entry *xe = xs->here;
4801 struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4802 void *base;
4797 4803
4798 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); 4804 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4799 4805
4800 offset = le16_to_cpu(xe->xe_name_offset) + 4806 ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
4801 OCFS2_XATTR_SIZE(xe->xe_name_len); 4807 xe - xh->xh_entries,
4808 &block_off,
4809 &offset);
4810 if (ret) {
4811 mlog_errno(ret);
4812 goto out;
4813 }
4802 4814
4803 xv = (struct ocfs2_xattr_value_root *)(xs->base + offset); 4815 base = bucket_block(xs->bucket, block_off);
4816 xv = (struct ocfs2_xattr_value_root *)(base + offset +
4817 OCFS2_XATTR_SIZE(xe->xe_name_len));
4804 4818
4805 return __ocfs2_xattr_set_value_outside(inode, handle, 4819 ret = __ocfs2_xattr_set_value_outside(inode, handle,
4806 xv, val, value_len); 4820 xv, val, value_len);
4821 if (ret)
4822 mlog_errno(ret);
4823out:
4824 return ret;
4807} 4825}
4808 4826
4809static int ocfs2_rm_xattr_cluster(struct inode *inode, 4827static int ocfs2_rm_xattr_cluster(struct inode *inode,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d720243f5f4..38e337d51ced 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -400,7 +400,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
400 pdev->devt = devt; 400 pdev->devt = devt;
401 401
402 /* delay uevent until 'holders' subdir is created */ 402 /* delay uevent until 'holders' subdir is created */
403 pdev->uevent_suppress = 1; 403 dev_set_uevent_suppress(pdev, 1);
404 err = device_add(pdev); 404 err = device_add(pdev);
405 if (err) 405 if (err)
406 goto out_put; 406 goto out_put;
@@ -410,7 +410,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
410 if (!p->holder_dir) 410 if (!p->holder_dir)
411 goto out_del; 411 goto out_del;
412 412
413 pdev->uevent_suppress = 0; 413 dev_set_uevent_suppress(pdev, 0);
414 if (flags & ADDPART_FLAG_WHOLEDISK) { 414 if (flags & ADDPART_FLAG_WHOLEDISK) {
415 err = device_create_file(pdev, &dev_attr_whole_disk); 415 err = device_create_file(pdev, &dev_attr_whole_disk);
416 if (err) 416 if (err)
@@ -422,7 +422,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
422 rcu_assign_pointer(ptbl->part[partno], p); 422 rcu_assign_pointer(ptbl->part[partno], p);
423 423
424 /* suppress uevent if the disk supresses it */ 424 /* suppress uevent if the disk supresses it */
425 if (!ddev->uevent_suppress) 425 if (!dev_get_uevent_suppress(pdev))
426 kobject_uevent(&pdev->kobj, KOBJ_ADD); 426 kobject_uevent(&pdev->kobj, KOBJ_ADD);
427 427
428 return p; 428 return p;
@@ -455,7 +455,7 @@ void register_disk(struct gendisk *disk)
455 dev_set_name(ddev, disk->disk_name); 455 dev_set_name(ddev, disk->disk_name);
456 456
457 /* delay uevents, until we scanned partition table */ 457 /* delay uevents, until we scanned partition table */
458 ddev->uevent_suppress = 1; 458 dev_set_uevent_suppress(ddev, 1);
459 459
460 if (device_add(ddev)) 460 if (device_add(ddev))
461 return; 461 return;
@@ -490,7 +490,7 @@ void register_disk(struct gendisk *disk)
490 490
491exit: 491exit:
492 /* announce disk after possible partitions are created */ 492 /* announce disk after possible partitions are created */
493 ddev->uevent_suppress = 0; 493 dev_set_uevent_suppress(ddev, 0);
494 kobject_uevent(&ddev->kobj, KOBJ_ADD); 494 kobject_uevent(&ddev->kobj, KOBJ_ADD);
495 495
496 /* announce possible partitions */ 496 /* announce possible partitions */
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a48ba5179d5..14f502b89cf5 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -699,12 +699,12 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
699 int retval; 699 int retval;
700 700
701 mutex_lock(&inode->i_mutex); 701 mutex_lock(&inode->i_mutex);
702
703 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 702 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
704 703 if (retval >= 0) {
705 if (retval >= 0)
706 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 704 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
707 705 if (retval < 0) /* this can happen only if on == T */
706 fasync_helper(-1, filp, 0, &pipe->fasync_readers);
707 }
708 mutex_unlock(&inode->i_mutex); 708 mutex_unlock(&inode->i_mutex);
709 709
710 if (retval < 0) 710 if (retval < 0)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0c9de19a1633..beaa0ce3b82e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3066,7 +3066,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
3066 int retval = -ENOENT; 3066 int retval = -ENOENT;
3067 ino_t ino; 3067 ino_t ino;
3068 int tid; 3068 int tid;
3069 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
3070 struct pid_namespace *ns; 3069 struct pid_namespace *ns;
3071 3070
3072 task = get_proc_task(inode); 3071 task = get_proc_task(inode);
@@ -3083,18 +3082,18 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
3083 goto out_no_task; 3082 goto out_no_task;
3084 retval = 0; 3083 retval = 0;
3085 3084
3086 switch (pos) { 3085 switch ((unsigned long)filp->f_pos) {
3087 case 0: 3086 case 0:
3088 ino = inode->i_ino; 3087 ino = inode->i_ino;
3089 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) 3088 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
3090 goto out; 3089 goto out;
3091 pos++; 3090 filp->f_pos++;
3092 /* fall through */ 3091 /* fall through */
3093 case 1: 3092 case 1:
3094 ino = parent_ino(dentry); 3093 ino = parent_ino(dentry);
3095 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) 3094 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
3096 goto out; 3095 goto out;
3097 pos++; 3096 filp->f_pos++;
3098 /* fall through */ 3097 /* fall through */
3099 } 3098 }
3100 3099
@@ -3104,9 +3103,9 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
3104 ns = filp->f_dentry->d_sb->s_fs_info; 3103 ns = filp->f_dentry->d_sb->s_fs_info;
3105 tid = (int)filp->f_version; 3104 tid = (int)filp->f_version;
3106 filp->f_version = 0; 3105 filp->f_version = 0;
3107 for (task = first_tid(leader, tid, pos - 2, ns); 3106 for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
3108 task; 3107 task;
3109 task = next_tid(task), pos++) { 3108 task = next_tid(task), filp->f_pos++) {
3110 tid = task_pid_nr_ns(task, ns); 3109 tid = task_pid_nr_ns(task, ns);
3111 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { 3110 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
3112 /* returning this tgid failed, save it as the first 3111 /* returning this tgid failed, save it as the first
@@ -3117,7 +3116,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
3117 } 3116 }
3118 } 3117 }
3119out: 3118out:
3120 filp->f_pos = pos;
3121 put_task_struct(leader); 3119 put_task_struct(leader);
3122out_no_task: 3120out_no_task:
3123 return retval; 3121 return retval;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2d1345112a42..e9983837d08d 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -80,7 +80,7 @@ static const struct file_operations proc_kpagecount_operations = {
80#define KPF_RECLAIM 9 80#define KPF_RECLAIM 9
81#define KPF_BUDDY 10 81#define KPF_BUDDY 10
82 82
83#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) 83#define kpf_copy_bit(flags, dstpos, srcpos) (((flags >> srcpos) & 1) << dstpos)
84 84
85static ssize_t kpageflags_read(struct file *file, char __user *buf, 85static ssize_t kpageflags_read(struct file *file, char __user *buf,
86 size_t count, loff_t *ppos) 86 size_t count, loff_t *ppos)
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index b9b567a28376..5d7c7ececa64 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -114,6 +114,9 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
114 if (!pagevec_add(&lru_pvec, page)) 114 if (!pagevec_add(&lru_pvec, page))
115 __pagevec_lru_add_file(&lru_pvec); 115 __pagevec_lru_add_file(&lru_pvec);
116 116
117 /* prevent the page from being discarded on memory pressure */
118 SetPageDirty(page);
119
117 unlock_page(page); 120 unlock_page(page);
118 } 121 }
119 122
@@ -126,6 +129,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
126 return -EFBIG; 129 return -EFBIG;
127 130
128 add_error: 131 add_error:
132 pagevec_lru_add_file(&lru_pvec);
129 page_cache_release(pages + loop); 133 page_cache_release(pages + loop);
130 for (loop++; loop < npages; loop++) 134 for (loop++; loop < npages; loop++)
131 __free_page(pages + loop); 135 __free_page(pages + loop);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index c837dfc2b3c6..2a7960310349 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -80,7 +80,7 @@ static struct buffer_head *get_block_length(struct super_block *sb,
80 * generated a larger block - this does occasionally happen with zlib). 80 * generated a larger block - this does occasionally happen with zlib).
81 */ 81 */
82int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, 82int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
83 int length, u64 *next_index, int srclength) 83 int length, u64 *next_index, int srclength, int pages)
84{ 84{
85 struct squashfs_sb_info *msblk = sb->s_fs_info; 85 struct squashfs_sb_info *msblk = sb->s_fs_info;
86 struct buffer_head **bh; 86 struct buffer_head **bh;
@@ -184,7 +184,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
184 offset = 0; 184 offset = 0;
185 } 185 }
186 186
187 if (msblk->stream.avail_out == 0) { 187 if (msblk->stream.avail_out == 0 && page < pages) {
188 msblk->stream.next_out = buffer[page++]; 188 msblk->stream.next_out = buffer[page++];
189 msblk->stream.avail_out = PAGE_CACHE_SIZE; 189 msblk->stream.avail_out = PAGE_CACHE_SIZE;
190 } 190 }
@@ -201,25 +201,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
201 zlib_init = 1; 201 zlib_init = 1;
202 } 202 }
203 203
204 zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH); 204 zlib_err = zlib_inflate(&msblk->stream, Z_SYNC_FLUSH);
205 205
206 if (msblk->stream.avail_in == 0 && k < b) 206 if (msblk->stream.avail_in == 0 && k < b)
207 put_bh(bh[k++]); 207 put_bh(bh[k++]);
208 } while (zlib_err == Z_OK); 208 } while (zlib_err == Z_OK);
209 209
210 if (zlib_err != Z_STREAM_END) { 210 if (zlib_err != Z_STREAM_END) {
211 ERROR("zlib_inflate returned unexpected result" 211 ERROR("zlib_inflate error, data probably corrupt\n");
212 " 0x%x, srclength %d, avail_in %d,"
213 " avail_out %d\n", zlib_err, srclength,
214 msblk->stream.avail_in,
215 msblk->stream.avail_out);
216 goto release_mutex; 212 goto release_mutex;
217 } 213 }
218 214
219 zlib_err = zlib_inflateEnd(&msblk->stream); 215 zlib_err = zlib_inflateEnd(&msblk->stream);
220 if (zlib_err != Z_OK) { 216 if (zlib_err != Z_OK) {
221 ERROR("zlib_inflateEnd returned unexpected result 0x%x," 217 ERROR("zlib_inflate error, data probably corrupt\n");
222 " srclength %d\n", zlib_err, srclength);
223 goto release_mutex; 218 goto release_mutex;
224 } 219 }
225 length = msblk->stream.total_out; 220 length = msblk->stream.total_out;
@@ -268,7 +263,8 @@ block_release:
268 put_bh(bh[k]); 263 put_bh(bh[k]);
269 264
270read_failure: 265read_failure:
271 ERROR("sb_bread failed reading block 0x%llx\n", cur_index); 266 ERROR("squashfs_read_data failed to read block 0x%llx\n",
267 (unsigned long long) index);
272 kfree(bh); 268 kfree(bh);
273 return -EIO; 269 return -EIO;
274} 270}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index f29eda16d25e..1c4739e33af6 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -119,7 +119,7 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
119 119
120 entry->length = squashfs_read_data(sb, entry->data, 120 entry->length = squashfs_read_data(sb, entry->data,
121 block, length, &entry->next_index, 121 block, length, &entry->next_index,
122 cache->block_size); 122 cache->block_size, cache->pages);
123 123
124 spin_lock(&cache->lock); 124 spin_lock(&cache->lock);
125 125
@@ -406,7 +406,7 @@ int squashfs_read_table(struct super_block *sb, void *buffer, u64 block,
406 for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) 406 for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
407 data[i] = buffer; 407 data[i] = buffer;
408 res = squashfs_read_data(sb, data, block, length | 408 res = squashfs_read_data(sb, data, block, length |
409 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length); 409 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
410 kfree(data); 410 kfree(data);
411 return res; 411 return res;
412} 412}
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 7a63398bb855..9101dbde39ec 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -133,7 +133,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
133 type = le16_to_cpu(sqshb_ino->inode_type); 133 type = le16_to_cpu(sqshb_ino->inode_type);
134 switch (type) { 134 switch (type) {
135 case SQUASHFS_REG_TYPE: { 135 case SQUASHFS_REG_TYPE: {
136 unsigned int frag_offset, frag_size, frag; 136 unsigned int frag_offset, frag;
137 int frag_size;
137 u64 frag_blk; 138 u64 frag_blk;
138 struct squashfs_reg_inode *sqsh_ino = &squashfs_ino.reg; 139 struct squashfs_reg_inode *sqsh_ino = &squashfs_ino.reg;
139 140
@@ -175,7 +176,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
175 break; 176 break;
176 } 177 }
177 case SQUASHFS_LREG_TYPE: { 178 case SQUASHFS_LREG_TYPE: {
178 unsigned int frag_offset, frag_size, frag; 179 unsigned int frag_offset, frag;
180 int frag_size;
179 u64 frag_blk; 181 u64 frag_blk;
180 struct squashfs_lreg_inode *sqsh_ino = &squashfs_ino.lreg; 182 struct squashfs_lreg_inode *sqsh_ino = &squashfs_ino.lreg;
181 183
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 6b2515d027d5..0e9feb6adf7e 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -34,7 +34,7 @@ static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)
34 34
35/* block.c */ 35/* block.c */
36extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, 36extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
37 int); 37 int, int);
38 38
39/* cache.c */ 39/* cache.c */
40extern struct squashfs_cache *squashfs_cache_init(char *, int, int); 40extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 071df5b5b491..681ec0d83799 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -389,7 +389,7 @@ static int __init init_squashfs_fs(void)
389 return err; 389 return err;
390 } 390 }
391 391
392 printk(KERN_INFO "squashfs: version 4.0 (2009/01/03) " 392 printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
393 "Phillip Lougher\n"); 393 "Phillip Lougher\n");
394 394
395 return 0; 395 return 0;
diff --git a/fs/super.c b/fs/super.c
index 8349ed6b1412..6ce501447ada 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -371,8 +371,10 @@ retry:
371 continue; 371 continue;
372 if (!grab_super(old)) 372 if (!grab_super(old))
373 goto retry; 373 goto retry;
374 if (s) 374 if (s) {
375 up_write(&s->s_umount);
375 destroy_super(s); 376 destroy_super(s);
377 }
376 return old; 378 return old;
377 } 379 }
378 } 380 }
@@ -387,6 +389,7 @@ retry:
387 err = set(s, data); 389 err = set(s, data);
388 if (err) { 390 if (err) {
389 spin_unlock(&sb_lock); 391 spin_unlock(&sb_lock);
392 up_write(&s->s_umount);
390 destroy_super(s); 393 destroy_super(s);
391 return ERR_PTR(err); 394 return ERR_PTR(err);
392 } 395 }
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index f2c478c3424e..07703d3ff4a1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -21,15 +21,28 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/mm.h>
24 25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26 27
27#include "sysfs.h" 28#include "sysfs.h"
28 29
30/*
31 * There's one bin_buffer for each open file.
32 *
33 * filp->private_data points to bin_buffer and
34 * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
35 * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
36 */
37static DEFINE_MUTEX(sysfs_bin_lock);
38
29struct bin_buffer { 39struct bin_buffer {
30 struct mutex mutex; 40 struct mutex mutex;
31 void *buffer; 41 void *buffer;
32 int mmapped; 42 int mmapped;
43 struct vm_operations_struct *vm_ops;
44 struct file *file;
45 struct hlist_node list;
33}; 46};
34 47
35static int 48static int
@@ -168,6 +181,175 @@ out_free:
168 return count; 181 return count;
169} 182}
170 183
184static void bin_vma_open(struct vm_area_struct *vma)
185{
186 struct file *file = vma->vm_file;
187 struct bin_buffer *bb = file->private_data;
188 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
189
190 if (!bb->vm_ops || !bb->vm_ops->open)
191 return;
192
193 if (!sysfs_get_active_two(attr_sd))
194 return;
195
196 bb->vm_ops->open(vma);
197
198 sysfs_put_active_two(attr_sd);
199}
200
201static void bin_vma_close(struct vm_area_struct *vma)
202{
203 struct file *file = vma->vm_file;
204 struct bin_buffer *bb = file->private_data;
205 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
206
207 if (!bb->vm_ops || !bb->vm_ops->close)
208 return;
209
210 if (!sysfs_get_active_two(attr_sd))
211 return;
212
213 bb->vm_ops->close(vma);
214
215 sysfs_put_active_two(attr_sd);
216}
217
218static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
219{
220 struct file *file = vma->vm_file;
221 struct bin_buffer *bb = file->private_data;
222 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
223 int ret;
224
225 if (!bb->vm_ops || !bb->vm_ops->fault)
226 return VM_FAULT_SIGBUS;
227
228 if (!sysfs_get_active_two(attr_sd))
229 return VM_FAULT_SIGBUS;
230
231 ret = bb->vm_ops->fault(vma, vmf);
232
233 sysfs_put_active_two(attr_sd);
234 return ret;
235}
236
237static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
238{
239 struct file *file = vma->vm_file;
240 struct bin_buffer *bb = file->private_data;
241 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
242 int ret;
243
244 if (!bb->vm_ops)
245 return -EINVAL;
246
247 if (!bb->vm_ops->page_mkwrite)
248 return 0;
249
250 if (!sysfs_get_active_two(attr_sd))
251 return -EINVAL;
252
253 ret = bb->vm_ops->page_mkwrite(vma, page);
254
255 sysfs_put_active_two(attr_sd);
256 return ret;
257}
258
259static int bin_access(struct vm_area_struct *vma, unsigned long addr,
260 void *buf, int len, int write)
261{
262 struct file *file = vma->vm_file;
263 struct bin_buffer *bb = file->private_data;
264 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
265 int ret;
266
267 if (!bb->vm_ops || !bb->vm_ops->access)
268 return -EINVAL;
269
270 if (!sysfs_get_active_two(attr_sd))
271 return -EINVAL;
272
273 ret = bb->vm_ops->access(vma, addr, buf, len, write);
274
275 sysfs_put_active_two(attr_sd);
276 return ret;
277}
278
279#ifdef CONFIG_NUMA
280static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
281{
282 struct file *file = vma->vm_file;
283 struct bin_buffer *bb = file->private_data;
284 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
285 int ret;
286
287 if (!bb->vm_ops || !bb->vm_ops->set_policy)
288 return 0;
289
290 if (!sysfs_get_active_two(attr_sd))
291 return -EINVAL;
292
293 ret = bb->vm_ops->set_policy(vma, new);
294
295 sysfs_put_active_two(attr_sd);
296 return ret;
297}
298
299static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
300 unsigned long addr)
301{
302 struct file *file = vma->vm_file;
303 struct bin_buffer *bb = file->private_data;
304 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
305 struct mempolicy *pol;
306
307 if (!bb->vm_ops || !bb->vm_ops->get_policy)
308 return vma->vm_policy;
309
310 if (!sysfs_get_active_two(attr_sd))
311 return vma->vm_policy;
312
313 pol = bb->vm_ops->get_policy(vma, addr);
314
315 sysfs_put_active_two(attr_sd);
316 return pol;
317}
318
319static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
320 const nodemask_t *to, unsigned long flags)
321{
322 struct file *file = vma->vm_file;
323 struct bin_buffer *bb = file->private_data;
324 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
325 int ret;
326
327 if (!bb->vm_ops || !bb->vm_ops->migrate)
328 return 0;
329
330 if (!sysfs_get_active_two(attr_sd))
331 return 0;
332
333 ret = bb->vm_ops->migrate(vma, from, to, flags);
334
335 sysfs_put_active_two(attr_sd);
336 return ret;
337}
338#endif
339
340static struct vm_operations_struct bin_vm_ops = {
341 .open = bin_vma_open,
342 .close = bin_vma_close,
343 .fault = bin_fault,
344 .page_mkwrite = bin_page_mkwrite,
345 .access = bin_access,
346#ifdef CONFIG_NUMA
347 .set_policy = bin_set_policy,
348 .get_policy = bin_get_policy,
349 .migrate = bin_migrate,
350#endif
351};
352
171static int mmap(struct file *file, struct vm_area_struct *vma) 353static int mmap(struct file *file, struct vm_area_struct *vma)
172{ 354{
173 struct bin_buffer *bb = file->private_data; 355 struct bin_buffer *bb = file->private_data;
@@ -179,18 +361,37 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
179 mutex_lock(&bb->mutex); 361 mutex_lock(&bb->mutex);
180 362
181 /* need attr_sd for attr, its parent for kobj */ 363 /* need attr_sd for attr, its parent for kobj */
364 rc = -ENODEV;
182 if (!sysfs_get_active_two(attr_sd)) 365 if (!sysfs_get_active_two(attr_sd))
183 return -ENODEV; 366 goto out_unlock;
184 367
185 rc = -EINVAL; 368 rc = -EINVAL;
186 if (attr->mmap) 369 if (!attr->mmap)
187 rc = attr->mmap(kobj, attr, vma); 370 goto out_put;
371
372 rc = attr->mmap(kobj, attr, vma);
373 if (rc)
374 goto out_put;
188 375
189 if (rc == 0 && !bb->mmapped) 376 /*
190 bb->mmapped = 1; 377 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
191 else 378 * to satisfy versions of X which crash if the mmap fails: that
192 sysfs_put_active_two(attr_sd); 379 * substitutes a new vm_file, and we don't then want bin_vm_ops.
380 */
381 if (vma->vm_file != file)
382 goto out_put;
193 383
384 rc = -EINVAL;
385 if (bb->mmapped && bb->vm_ops != vma->vm_ops)
386 goto out_put;
387
388 rc = 0;
389 bb->mmapped = 1;
390 bb->vm_ops = vma->vm_ops;
391 vma->vm_ops = &bin_vm_ops;
392out_put:
393 sysfs_put_active_two(attr_sd);
394out_unlock:
194 mutex_unlock(&bb->mutex); 395 mutex_unlock(&bb->mutex);
195 396
196 return rc; 397 return rc;
@@ -223,8 +424,13 @@ static int open(struct inode * inode, struct file * file)
223 goto err_out; 424 goto err_out;
224 425
225 mutex_init(&bb->mutex); 426 mutex_init(&bb->mutex);
427 bb->file = file;
226 file->private_data = bb; 428 file->private_data = bb;
227 429
430 mutex_lock(&sysfs_bin_lock);
431 hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
432 mutex_unlock(&sysfs_bin_lock);
433
228 /* open succeeded, put active references */ 434 /* open succeeded, put active references */
229 sysfs_put_active_two(attr_sd); 435 sysfs_put_active_two(attr_sd);
230 return 0; 436 return 0;
@@ -237,11 +443,12 @@ static int open(struct inode * inode, struct file * file)
237 443
238static int release(struct inode * inode, struct file * file) 444static int release(struct inode * inode, struct file * file)
239{ 445{
240 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
241 struct bin_buffer *bb = file->private_data; 446 struct bin_buffer *bb = file->private_data;
242 447
243 if (bb->mmapped) 448 mutex_lock(&sysfs_bin_lock);
244 sysfs_put_active_two(attr_sd); 449 hlist_del(&bb->list);
450 mutex_unlock(&sysfs_bin_lock);
451
245 kfree(bb->buffer); 452 kfree(bb->buffer);
246 kfree(bb); 453 kfree(bb);
247 return 0; 454 return 0;
@@ -256,6 +463,26 @@ const struct file_operations bin_fops = {
256 .release = release, 463 .release = release,
257}; 464};
258 465
466
467void unmap_bin_file(struct sysfs_dirent *attr_sd)
468{
469 struct bin_buffer *bb;
470 struct hlist_node *tmp;
471
472 if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
473 return;
474
475 mutex_lock(&sysfs_bin_lock);
476
477 hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
478 struct inode *inode = bb->file->f_path.dentry->d_inode;
479
480 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
481 }
482
483 mutex_unlock(&sysfs_bin_lock);
484}
485
259/** 486/**
260 * sysfs_create_bin_file - create binary file for object. 487 * sysfs_create_bin_file - create binary file for object.
261 * @kobj: object. 488 * @kobj: object.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 82d3b79d0e08..66aeb4fff0c3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -434,6 +434,26 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
434} 434}
435 435
436/** 436/**
437 * sysfs_pathname - return full path to sysfs dirent
438 * @sd: sysfs_dirent whose path we want
439 * @path: caller allocated buffer
440 *
441 * Gives the name "/" to the sysfs_root entry; any path returned
442 * is relative to wherever sysfs is mounted.
443 *
444 * XXX: does no error checking on @path size
445 */
446static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
447{
448 if (sd->s_parent) {
449 sysfs_pathname(sd->s_parent, path);
450 strcat(path, "/");
451 }
452 strcat(path, sd->s_name);
453 return path;
454}
455
456/**
437 * sysfs_add_one - add sysfs_dirent to parent 457 * sysfs_add_one - add sysfs_dirent to parent
438 * @acxt: addrm context to use 458 * @acxt: addrm context to use
439 * @sd: sysfs_dirent to be added 459 * @sd: sysfs_dirent to be added
@@ -458,8 +478,16 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
458 int ret; 478 int ret;
459 479
460 ret = __sysfs_add_one(acxt, sd); 480 ret = __sysfs_add_one(acxt, sd);
461 WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' " 481 if (ret == -EEXIST) {
462 "can not be created\n", sd->s_name); 482 char *path = kzalloc(PATH_MAX, GFP_KERNEL);
483 WARN(1, KERN_WARNING
484 "sysfs: cannot create duplicate filename '%s'\n",
485 (path == NULL) ? sd->s_name :
486 strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
487 sd->s_name));
488 kfree(path);
489 }
490
463 return ret; 491 return ret;
464} 492}
465 493
@@ -581,6 +609,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
581 609
582 sysfs_drop_dentry(sd); 610 sysfs_drop_dentry(sd);
583 sysfs_deactivate(sd); 611 sysfs_deactivate(sd);
612 unmap_bin_file(sd);
584 sysfs_put(sd); 613 sysfs_put(sd);
585 } 614 }
586} 615}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1f4a3f877262..289c43a47263 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -659,13 +659,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
659EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); 659EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
660 660
661struct sysfs_schedule_callback_struct { 661struct sysfs_schedule_callback_struct {
662 struct kobject *kobj; 662 struct list_head workq_list;
663 struct kobject *kobj;
663 void (*func)(void *); 664 void (*func)(void *);
664 void *data; 665 void *data;
665 struct module *owner; 666 struct module *owner;
666 struct work_struct work; 667 struct work_struct work;
667}; 668};
668 669
670static DEFINE_MUTEX(sysfs_workq_mutex);
671static LIST_HEAD(sysfs_workq);
669static void sysfs_schedule_callback_work(struct work_struct *work) 672static void sysfs_schedule_callback_work(struct work_struct *work)
670{ 673{
671 struct sysfs_schedule_callback_struct *ss = container_of(work, 674 struct sysfs_schedule_callback_struct *ss = container_of(work,
@@ -674,6 +677,9 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
674 (ss->func)(ss->data); 677 (ss->func)(ss->data);
675 kobject_put(ss->kobj); 678 kobject_put(ss->kobj);
676 module_put(ss->owner); 679 module_put(ss->owner);
680 mutex_lock(&sysfs_workq_mutex);
681 list_del(&ss->workq_list);
682 mutex_unlock(&sysfs_workq_mutex);
677 kfree(ss); 683 kfree(ss);
678} 684}
679 685
@@ -695,15 +701,25 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
695 * until @func returns. 701 * until @func returns.
696 * 702 *
697 * Returns 0 if the request was submitted, -ENOMEM if storage could not 703 * Returns 0 if the request was submitted, -ENOMEM if storage could not
698 * be allocated, -ENODEV if a reference to @owner isn't available. 704 * be allocated, -ENODEV if a reference to @owner isn't available,
705 * -EAGAIN if a callback has already been scheduled for @kobj.
699 */ 706 */
700int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), 707int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
701 void *data, struct module *owner) 708 void *data, struct module *owner)
702{ 709{
703 struct sysfs_schedule_callback_struct *ss; 710 struct sysfs_schedule_callback_struct *ss, *tmp;
704 711
705 if (!try_module_get(owner)) 712 if (!try_module_get(owner))
706 return -ENODEV; 713 return -ENODEV;
714
715 mutex_lock(&sysfs_workq_mutex);
716 list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
717 if (ss->kobj == kobj) {
718 mutex_unlock(&sysfs_workq_mutex);
719 return -EAGAIN;
720 }
721 mutex_unlock(&sysfs_workq_mutex);
722
707 ss = kmalloc(sizeof(*ss), GFP_KERNEL); 723 ss = kmalloc(sizeof(*ss), GFP_KERNEL);
708 if (!ss) { 724 if (!ss) {
709 module_put(owner); 725 module_put(owner);
@@ -715,6 +731,10 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
715 ss->data = data; 731 ss->data = data;
716 ss->owner = owner; 732 ss->owner = owner;
717 INIT_WORK(&ss->work, sysfs_schedule_callback_work); 733 INIT_WORK(&ss->work, sysfs_schedule_callback_work);
734 INIT_LIST_HEAD(&ss->workq_list);
735 mutex_lock(&sysfs_workq_mutex);
736 list_add_tail(&ss->workq_list, &sysfs_workq);
737 mutex_unlock(&sysfs_workq_mutex);
718 schedule_work(&ss->work); 738 schedule_work(&ss->work);
719 return 0; 739 return 0;
720} 740}
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index dfa3d94cfc74..555f0ff988df 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -147,6 +147,7 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
147{ 147{
148 struct bin_attribute *bin_attr; 148 struct bin_attribute *bin_attr;
149 149
150 inode->i_private = sysfs_get(sd);
150 inode->i_mapping->a_ops = &sysfs_aops; 151 inode->i_mapping->a_ops = &sysfs_aops;
151 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 152 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
152 inode->i_op = &sysfs_inode_operations; 153 inode->i_op = &sysfs_inode_operations;
@@ -214,6 +215,22 @@ struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
214 return inode; 215 return inode;
215} 216}
216 217
218/*
219 * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
220 * To prevent the sysfs inode numbers from being freed prematurely we take a
221 * reference to sysfs_dirent from the sysfs inode. A
222 * super_operations.delete_inode() implementation is needed to drop that
223 * reference upon inode destruction.
224 */
225void sysfs_delete_inode(struct inode *inode)
226{
227 struct sysfs_dirent *sd = inode->i_private;
228
229 truncate_inode_pages(&inode->i_data, 0);
230 clear_inode(inode);
231 sysfs_put(sd);
232}
233
217int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) 234int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
218{ 235{
219 struct sysfs_addrm_cxt acxt; 236 struct sysfs_addrm_cxt acxt;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index ab343e371d64..49749955ccaf 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -17,11 +17,10 @@
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/magic.h>
20 21
21#include "sysfs.h" 22#include "sysfs.h"
22 23
23/* Random magic number */
24#define SYSFS_MAGIC 0x62656572
25 24
26static struct vfsmount *sysfs_mount; 25static struct vfsmount *sysfs_mount;
27struct super_block * sysfs_sb = NULL; 26struct super_block * sysfs_sb = NULL;
@@ -30,6 +29,7 @@ struct kmem_cache *sysfs_dir_cachep;
30static const struct super_operations sysfs_ops = { 29static const struct super_operations sysfs_ops = {
31 .statfs = simple_statfs, 30 .statfs = simple_statfs,
32 .drop_inode = generic_delete_inode, 31 .drop_inode = generic_delete_inode,
32 .delete_inode = sysfs_delete_inode,
33}; 33};
34 34
35struct sysfs_dirent sysfs_root = { 35struct sysfs_dirent sysfs_root = {
@@ -53,7 +53,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
53 sysfs_sb = sb; 53 sysfs_sb = sb;
54 54
55 /* get root inode, initialize and unlock it */ 55 /* get root inode, initialize and unlock it */
56 mutex_lock(&sysfs_mutex);
56 inode = sysfs_get_inode(&sysfs_root); 57 inode = sysfs_get_inode(&sysfs_root);
58 mutex_unlock(&sysfs_mutex);
57 if (!inode) { 59 if (!inode) {
58 pr_debug("sysfs: could not get root inode\n"); 60 pr_debug("sysfs: could not get root inode\n");
59 return -ENOMEM; 61 return -ENOMEM;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 93c6d6b27c4d..3fa0d98481e2 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -28,6 +28,7 @@ struct sysfs_elem_attr {
28 28
29struct sysfs_elem_bin_attr { 29struct sysfs_elem_bin_attr {
30 struct bin_attribute *bin_attr; 30 struct bin_attribute *bin_attr;
31 struct hlist_head buffers;
31}; 32};
32 33
33/* 34/*
@@ -145,6 +146,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
145 * inode.c 146 * inode.c
146 */ 147 */
147struct inode *sysfs_get_inode(struct sysfs_dirent *sd); 148struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
149void sysfs_delete_inode(struct inode *inode);
148int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 150int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
149int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); 151int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
150int sysfs_inode_init(void); 152int sysfs_inode_init(void);
@@ -163,6 +165,7 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
163 * bin.c 165 * bin.c
164 */ 166 */
165extern const struct file_operations bin_fops; 167extern const struct file_operations bin_fops;
168void unmap_bin_file(struct sysfs_dirent *attr_sd);
166 169
167/* 170/*
168 * symlink.c 171 * symlink.c
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index e65212dfb60e..261a1c2f22dd 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -41,7 +41,7 @@
41 * Stefan Reinauer <stepan@home.culture.mipt.ru> 41 * Stefan Reinauer <stepan@home.culture.mipt.ru>
42 * 42 *
43 * Module usage counts added on 96/04/29 by 43 * Module usage counts added on 96/04/29 by
44 * Gertjan van Wingerde <gertjan@cs.vu.nl> 44 * Gertjan van Wingerde <gwingerde@gmail.com>
45 * 45 *
46 * Clean swab support on 19970406 by 46 * Clean swab support on 19970406 by
47 * Francois-Rene Rideau <fare@tunes.org> 47 * Francois-Rene Rideau <fare@tunes.org>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index cb329edc925b..aa1016bb9134 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -34,6 +34,12 @@
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36 36
37#include "xfs_sb.h"
38#include "xfs_inum.h"
39#include "xfs_ag.h"
40#include "xfs_dmapi.h"
41#include "xfs_mount.h"
42
37static kmem_zone_t *xfs_buf_zone; 43static kmem_zone_t *xfs_buf_zone;
38STATIC int xfsbufd(void *); 44STATIC int xfsbufd(void *);
39STATIC int xfsbufd_wakeup(int, gfp_t); 45STATIC int xfsbufd_wakeup(int, gfp_t);
@@ -1435,10 +1441,12 @@ xfs_unregister_buftarg(
1435 1441
1436void 1442void
1437xfs_free_buftarg( 1443xfs_free_buftarg(
1438 xfs_buftarg_t *btp) 1444 struct xfs_mount *mp,
1445 struct xfs_buftarg *btp)
1439{ 1446{
1440 xfs_flush_buftarg(btp, 1); 1447 xfs_flush_buftarg(btp, 1);
1441 xfs_blkdev_issue_flush(btp); 1448 if (mp->m_flags & XFS_MOUNT_BARRIER)
1449 xfs_blkdev_issue_flush(btp);
1442 xfs_free_bufhash(btp); 1450 xfs_free_bufhash(btp);
1443 iput(btp->bt_mapping->host); 1451 iput(btp->bt_mapping->host);
1444 1452
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 288ae7c4c800..9b4d666ad31f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -413,7 +413,7 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
413 * Handling of buftargs. 413 * Handling of buftargs.
414 */ 414 */
415extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 415extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
416extern void xfs_free_buftarg(xfs_buftarg_t *); 416extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
417extern void xfs_wait_buftarg(xfs_buftarg_t *); 417extern void xfs_wait_buftarg(xfs_buftarg_t *);
418extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 418extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
419extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 419extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index c71e226da7f5..32ae5028e96b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -734,15 +734,15 @@ xfs_close_devices(
734{ 734{
735 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 735 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
736 struct block_device *logdev = mp->m_logdev_targp->bt_bdev; 736 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
737 xfs_free_buftarg(mp->m_logdev_targp); 737 xfs_free_buftarg(mp, mp->m_logdev_targp);
738 xfs_blkdev_put(logdev); 738 xfs_blkdev_put(logdev);
739 } 739 }
740 if (mp->m_rtdev_targp) { 740 if (mp->m_rtdev_targp) {
741 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; 741 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
742 xfs_free_buftarg(mp->m_rtdev_targp); 742 xfs_free_buftarg(mp, mp->m_rtdev_targp);
743 xfs_blkdev_put(rtdev); 743 xfs_blkdev_put(rtdev);
744 } 744 }
745 xfs_free_buftarg(mp->m_ddev_targp); 745 xfs_free_buftarg(mp, mp->m_ddev_targp);
746} 746}
747 747
748/* 748/*
@@ -811,9 +811,9 @@ xfs_open_devices(
811 811
812 out_free_rtdev_targ: 812 out_free_rtdev_targ:
813 if (mp->m_rtdev_targp) 813 if (mp->m_rtdev_targp)
814 xfs_free_buftarg(mp->m_rtdev_targp); 814 xfs_free_buftarg(mp, mp->m_rtdev_targp);
815 out_free_ddev_targ: 815 out_free_ddev_targ:
816 xfs_free_buftarg(mp->m_ddev_targp); 816 xfs_free_buftarg(mp, mp->m_ddev_targp);
817 out_close_rtdev: 817 out_close_rtdev:
818 if (rtdev) 818 if (rtdev)
819 xfs_blkdev_put(rtdev); 819 xfs_blkdev_put(rtdev);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e2fb6210d4c5..478e587087fe 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -246,9 +246,6 @@ xfs_iget_cache_miss(
246 goto out_destroy; 246 goto out_destroy;
247 } 247 }
248 248
249 if (lock_flags)
250 xfs_ilock(ip, lock_flags);
251
252 /* 249 /*
253 * Preload the radix tree so we can insert safely under the 250 * Preload the radix tree so we can insert safely under the
254 * write spinlock. Note that we cannot sleep inside the preload 251 * write spinlock. Note that we cannot sleep inside the preload
@@ -256,7 +253,16 @@ xfs_iget_cache_miss(
256 */ 253 */
257 if (radix_tree_preload(GFP_KERNEL)) { 254 if (radix_tree_preload(GFP_KERNEL)) {
258 error = EAGAIN; 255 error = EAGAIN;
259 goto out_unlock; 256 goto out_destroy;
257 }
258
259 /*
260 * Because the inode hasn't been added to the radix-tree yet it can't
261 * be found by another thread, so we can do the non-sleeping lock here.
262 */
263 if (lock_flags) {
264 if (!xfs_ilock_nowait(ip, lock_flags))
265 BUG();
260 } 266 }
261 267
262 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 268 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
@@ -284,7 +290,6 @@ xfs_iget_cache_miss(
284out_preload_end: 290out_preload_end:
285 write_unlock(&pag->pag_ici_lock); 291 write_unlock(&pag->pag_ici_lock);
286 radix_tree_preload_end(); 292 radix_tree_preload_end();
287out_unlock:
288 if (lock_flags) 293 if (lock_flags)
289 xfs_iunlock(ip, lock_flags); 294 xfs_iunlock(ip, lock_flags);
290out_destroy: 295out_destroy:
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b1047de2fffd..61af610d79b3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1455,10 +1455,19 @@ xlog_recover_add_to_trans(
1455 item = item->ri_prev; 1455 item = item->ri_prev;
1456 1456
1457 if (item->ri_total == 0) { /* first region to be added */ 1457 if (item->ri_total == 0) { /* first region to be added */
1458 item->ri_total = in_f->ilf_size; 1458 if (in_f->ilf_size == 0 ||
1459 ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM); 1459 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
1460 item->ri_buf = kmem_zalloc((item->ri_total * 1460 xlog_warn(
1461 sizeof(xfs_log_iovec_t)), KM_SLEEP); 1461 "XFS: bad number of regions (%d) in inode log format",
1462 in_f->ilf_size);
1463 ASSERT(0);
1464 return XFS_ERROR(EIO);
1465 }
1466
1467 item->ri_total = in_f->ilf_size;
1468 item->ri_buf =
1469 kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
1470 KM_SLEEP);
1462 } 1471 }
1463 ASSERT(item->ri_total > item->ri_cnt); 1472 ASSERT(item->ri_total > item->ri_cnt);
1464 /* Description region is ri_buf[0] */ 1473 /* Description region is ri_buf[0] */