aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bio-integrity.c85
-rw-r--r--fs/bio.c87
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/cifs/CHANGES11
-rw-r--r--fs/cifs/Kconfig21
-rw-r--r--fs/cifs/README22
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifs_dfs_ref.c36
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifspdu.h76
-rw-r--r--fs/cifs/cifsproto.h9
-rw-r--r--fs/cifs/cifssmb.c27
-rw-r--r--fs/cifs/connect.c9
-rw-r--r--fs/cifs/dir.c6
-rw-r--r--fs/cifs/file.c199
-rw-r--r--fs/cifs/inode.c3
-rw-r--r--fs/cifs/smbfsctl.h84
-rw-r--r--fs/compat.c3
-rw-r--r--fs/compat_ioctl.c8
-rw-r--r--fs/dlm/dir.c18
-rw-r--r--fs/dlm/dlm_internal.h2
-rw-r--r--fs/dlm/lock.c60
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/dlm/lowcomms.c181
-rw-r--r--fs/dlm/user.c24
-rw-r--r--fs/eventpoll.c12
-rw-r--r--fs/exec.c13
-rw-r--r--fs/ext3/inode.c18
-rw-r--r--fs/fcntl.c33
-rw-r--r--fs/file_table.c3
-rw-r--r--fs/gfs2/Kconfig17
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/acl.c1
-rw-r--r--fs/gfs2/bmap.c1
-rw-r--r--fs/gfs2/dir.c1
-rw-r--r--fs/gfs2/eaops.c1
-rw-r--r--fs/gfs2/eattr.c1
-rw-r--r--fs/gfs2/glock.c268
-rw-r--r--fs/gfs2/glock.h127
-rw-r--r--fs/gfs2/glops.c160
-rw-r--r--fs/gfs2/glops.h1
-rw-r--r--fs/gfs2/incore.h71
-rw-r--r--fs/gfs2/inode.c13
-rw-r--r--fs/gfs2/inode.h22
-rw-r--r--fs/gfs2/lock_dlm.c241
-rw-r--r--fs/gfs2/locking.c232
-rw-r--r--fs/gfs2/locking/dlm/Makefile3
-rw-r--r--fs/gfs2/locking/dlm/lock.c708
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h166
-rw-r--r--fs/gfs2/locking/dlm/main.c48
-rw-r--r--fs/gfs2/locking/dlm/mount.c276
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c226
-rw-r--r--fs/gfs2/locking/dlm/thread.c68
-rw-r--r--fs/gfs2/log.c1
-rw-r--r--fs/gfs2/lops.c1
-rw-r--r--fs/gfs2/main.c13
-rw-r--r--fs/gfs2/meta_io.c22
-rw-r--r--fs/gfs2/meta_io.h1
-rw-r--r--fs/gfs2/mount.c128
-rw-r--r--fs/gfs2/mount.h17
-rw-r--r--fs/gfs2/ops_address.c5
-rw-r--r--fs/gfs2/ops_dentry.c1
-rw-r--r--fs/gfs2/ops_export.c1
-rw-r--r--fs/gfs2/ops_file.c76
-rw-r--r--fs/gfs2/ops_fstype.c156
-rw-r--r--fs/gfs2/ops_inode.c1
-rw-r--r--fs/gfs2/ops_super.c44
-rw-r--r--fs/gfs2/quota.c203
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/recovery.c28
-rw-r--r--fs/gfs2/rgrp.c189
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/super.h26
-rw-r--r--fs/gfs2/sys.c236
-rw-r--r--fs/gfs2/trans.c19
-rw-r--r--fs/gfs2/util.c11
-rw-r--r--fs/inode.c71
-rw-r--r--fs/ioctl.c18
-rw-r--r--fs/namei.c8
-rw-r--r--fs/namespace.c11
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/partitions/check.c10
-rw-r--r--fs/partitions/ibm.c101
-rw-r--r--fs/pipe.c16
-rw-r--r--fs/quota/dquot.c5
-rw-r--r--fs/super.c11
-rw-r--r--fs/sync.c14
-rw-r--r--fs/sysfs/bin.c253
-rw-r--r--fs/sysfs/dir.c33
-rw-r--r--fs/sysfs/file.c26
-rw-r--r--fs/sysfs/inode.c17
-rw-r--r--fs/sysfs/mount.c6
-rw-r--r--fs/sysfs/sysfs.h3
94 files changed, 2558 insertions, 2949 deletions
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index fe2b1aa2464e..31c46a241bac 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -26,23 +26,23 @@
26#include <linux/workqueue.h> 26#include <linux/workqueue.h>
27 27
28static struct kmem_cache *bio_integrity_slab __read_mostly; 28static struct kmem_cache *bio_integrity_slab __read_mostly;
29static mempool_t *bio_integrity_pool;
30static struct bio_set *integrity_bio_set;
29static struct workqueue_struct *kintegrityd_wq; 31static struct workqueue_struct *kintegrityd_wq;
30 32
31/** 33/**
32 * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio 34 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
33 * @bio: bio to attach integrity metadata to 35 * @bio: bio to attach integrity metadata to
34 * @gfp_mask: Memory allocation mask 36 * @gfp_mask: Memory allocation mask
35 * @nr_vecs: Number of integrity metadata scatter-gather elements 37 * @nr_vecs: Number of integrity metadata scatter-gather elements
36 * @bs: bio_set to allocate from
37 * 38 *
38 * Description: This function prepares a bio for attaching integrity 39 * Description: This function prepares a bio for attaching integrity
39 * metadata. nr_vecs specifies the maximum number of pages containing 40 * metadata. nr_vecs specifies the maximum number of pages containing
40 * integrity metadata that can be attached. 41 * integrity metadata that can be attached.
41 */ 42 */
42struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, 43struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
43 gfp_t gfp_mask, 44 gfp_t gfp_mask,
44 unsigned int nr_vecs, 45 unsigned int nr_vecs)
45 struct bio_set *bs)
46{ 46{
47 struct bio_integrity_payload *bip; 47 struct bio_integrity_payload *bip;
48 struct bio_vec *iv; 48 struct bio_vec *iv;
@@ -50,7 +50,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
50 50
51 BUG_ON(bio == NULL); 51 BUG_ON(bio == NULL);
52 52
53 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); 53 bip = mempool_alloc(bio_integrity_pool, gfp_mask);
54 if (unlikely(bip == NULL)) { 54 if (unlikely(bip == NULL)) {
55 printk(KERN_ERR "%s: could not alloc bip\n", __func__); 55 printk(KERN_ERR "%s: could not alloc bip\n", __func__);
56 return NULL; 56 return NULL;
@@ -58,10 +58,10 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
58 58
59 memset(bip, 0, sizeof(*bip)); 59 memset(bip, 0, sizeof(*bip));
60 60
61 iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs); 61 iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set);
62 if (unlikely(iv == NULL)) { 62 if (unlikely(iv == NULL)) {
63 printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); 63 printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
64 mempool_free(bip, bs->bio_integrity_pool); 64 mempool_free(bip, bio_integrity_pool);
65 return NULL; 65 return NULL;
66 } 66 }
67 67
@@ -72,35 +72,16 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
72 72
73 return bip; 73 return bip;
74} 74}
75EXPORT_SYMBOL(bio_integrity_alloc_bioset);
76
77/**
78 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
79 * @bio: bio to attach integrity metadata to
80 * @gfp_mask: Memory allocation mask
81 * @nr_vecs: Number of integrity metadata scatter-gather elements
82 *
83 * Description: This function prepares a bio for attaching integrity
84 * metadata. nr_vecs specifies the maximum number of pages containing
85 * integrity metadata that can be attached.
86 */
87struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
88 gfp_t gfp_mask,
89 unsigned int nr_vecs)
90{
91 return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
92}
93EXPORT_SYMBOL(bio_integrity_alloc); 75EXPORT_SYMBOL(bio_integrity_alloc);
94 76
95/** 77/**
96 * bio_integrity_free - Free bio integrity payload 78 * bio_integrity_free - Free bio integrity payload
97 * @bio: bio containing bip to be freed 79 * @bio: bio containing bip to be freed
98 * @bs: bio_set this bio was allocated from
99 * 80 *
100 * Description: Used to free the integrity portion of a bio. Usually 81 * Description: Used to free the integrity portion of a bio. Usually
101 * called from bio_free(). 82 * called from bio_free().
102 */ 83 */
103void bio_integrity_free(struct bio *bio, struct bio_set *bs) 84void bio_integrity_free(struct bio *bio)
104{ 85{
105 struct bio_integrity_payload *bip = bio->bi_integrity; 86 struct bio_integrity_payload *bip = bio->bi_integrity;
106 87
@@ -111,8 +92,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
111 && bip->bip_buf != NULL) 92 && bip->bip_buf != NULL)
112 kfree(bip->bip_buf); 93 kfree(bip->bip_buf);
113 94
114 bvec_free_bs(bs, bip->bip_vec, bip->bip_pool); 95 bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool);
115 mempool_free(bip, bs->bio_integrity_pool); 96 mempool_free(bip, bio_integrity_pool);
116 97
117 bio->bi_integrity = NULL; 98 bio->bi_integrity = NULL;
118} 99}
@@ -686,19 +667,17 @@ EXPORT_SYMBOL(bio_integrity_split);
686 * @bio: New bio 667 * @bio: New bio
687 * @bio_src: Original bio 668 * @bio_src: Original bio
688 * @gfp_mask: Memory allocation mask 669 * @gfp_mask: Memory allocation mask
689 * @bs: bio_set to allocate bip from
690 * 670 *
691 * Description: Called to allocate a bip when cloning a bio 671 * Description: Called to allocate a bip when cloning a bio
692 */ 672 */
693int bio_integrity_clone(struct bio *bio, struct bio *bio_src, 673int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
694 gfp_t gfp_mask, struct bio_set *bs)
695{ 674{
696 struct bio_integrity_payload *bip_src = bio_src->bi_integrity; 675 struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
697 struct bio_integrity_payload *bip; 676 struct bio_integrity_payload *bip;
698 677
699 BUG_ON(bip_src == NULL); 678 BUG_ON(bip_src == NULL);
700 679
701 bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); 680 bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
702 681
703 if (bip == NULL) 682 if (bip == NULL)
704 return -EIO; 683 return -EIO;
@@ -714,37 +693,25 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
714} 693}
715EXPORT_SYMBOL(bio_integrity_clone); 694EXPORT_SYMBOL(bio_integrity_clone);
716 695
717int bioset_integrity_create(struct bio_set *bs, int pool_size) 696static int __init bio_integrity_init(void)
718{ 697{
719 bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, 698 kintegrityd_wq = create_workqueue("kintegrityd");
720 bio_integrity_slab);
721 if (!bs->bio_integrity_pool)
722 return -1;
723
724 return 0;
725}
726EXPORT_SYMBOL(bioset_integrity_create);
727 699
728void bioset_integrity_free(struct bio_set *bs) 700 if (!kintegrityd_wq)
729{ 701 panic("Failed to create kintegrityd\n");
730 if (bs->bio_integrity_pool)
731 mempool_destroy(bs->bio_integrity_pool);
732}
733EXPORT_SYMBOL(bioset_integrity_free);
734 702
735void __init bio_integrity_init_slab(void)
736{
737 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, 703 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
738 SLAB_HWCACHE_ALIGN|SLAB_PANIC); 704 SLAB_HWCACHE_ALIGN|SLAB_PANIC);
739}
740 705
741static int __init integrity_init(void) 706 bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE,
742{ 707 bio_integrity_slab);
743 kintegrityd_wq = create_workqueue("kintegrityd"); 708 if (!bio_integrity_pool)
709 panic("bio_integrity: can't allocate bip pool\n");
744 710
745 if (!kintegrityd_wq) 711 integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0);
746 panic("Failed to create kintegrityd\n"); 712 if (!integrity_bio_set)
713 panic("bio_integrity: can't allocate bio_set\n");
747 714
748 return 0; 715 return 0;
749} 716}
750subsys_initcall(integrity_init); 717subsys_initcall(bio_integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index d4f06327c810..a040cde7f6fd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -248,7 +248,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
248 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); 248 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
249 249
250 if (bio_integrity(bio)) 250 if (bio_integrity(bio))
251 bio_integrity_free(bio, bs); 251 bio_integrity_free(bio);
252 252
253 /* 253 /*
254 * If we have front padding, adjust the bio pointer before freeing 254 * If we have front padding, adjust the bio pointer before freeing
@@ -301,48 +301,51 @@ void bio_init(struct bio *bio)
301 **/ 301 **/
302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
303{ 303{
304 struct bio_vec *bvl = NULL;
304 struct bio *bio = NULL; 305 struct bio *bio = NULL;
305 void *uninitialized_var(p); 306 unsigned long idx = 0;
307 void *p = NULL;
306 308
307 if (bs) { 309 if (bs) {
308 p = mempool_alloc(bs->bio_pool, gfp_mask); 310 p = mempool_alloc(bs->bio_pool, gfp_mask);
309 311 if (!p)
310 if (p) 312 goto err;
311 bio = p + bs->front_pad; 313 bio = p + bs->front_pad;
312 } else 314 } else {
313 bio = kmalloc(sizeof(*bio), gfp_mask); 315 bio = kmalloc(sizeof(*bio), gfp_mask);
316 if (!bio)
317 goto err;
318 }
314 319
315 if (likely(bio)) { 320 bio_init(bio);
316 struct bio_vec *bvl = NULL; 321
317 322 if (unlikely(!nr_iovecs))
318 bio_init(bio); 323 goto out_set;
319 if (likely(nr_iovecs)) { 324
320 unsigned long uninitialized_var(idx); 325 if (nr_iovecs <= BIO_INLINE_VECS) {
321 326 bvl = bio->bi_inline_vecs;
322 if (nr_iovecs <= BIO_INLINE_VECS) { 327 nr_iovecs = BIO_INLINE_VECS;
323 idx = 0; 328 } else {
324 bvl = bio->bi_inline_vecs; 329 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
325 nr_iovecs = BIO_INLINE_VECS; 330 if (unlikely(!bvl))
326 } else { 331 goto err_free;
327 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, 332
328 bs); 333 nr_iovecs = bvec_nr_vecs(idx);
329 nr_iovecs = bvec_nr_vecs(idx);
330 }
331 if (unlikely(!bvl)) {
332 if (bs)
333 mempool_free(p, bs->bio_pool);
334 else
335 kfree(bio);
336 bio = NULL;
337 goto out;
338 }
339 bio->bi_flags |= idx << BIO_POOL_OFFSET;
340 bio->bi_max_vecs = nr_iovecs;
341 }
342 bio->bi_io_vec = bvl;
343 } 334 }
344out: 335 bio->bi_flags |= idx << BIO_POOL_OFFSET;
336 bio->bi_max_vecs = nr_iovecs;
337out_set:
338 bio->bi_io_vec = bvl;
339
345 return bio; 340 return bio;
341
342err_free:
343 if (bs)
344 mempool_free(p, bs->bio_pool);
345 else
346 kfree(bio);
347err:
348 return NULL;
346} 349}
347 350
348struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) 351struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
@@ -463,7 +466,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
463 if (bio_integrity(bio)) { 466 if (bio_integrity(bio)) {
464 int ret; 467 int ret;
465 468
466 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); 469 ret = bio_integrity_clone(b, bio, gfp_mask);
467 470
468 if (ret < 0) { 471 if (ret < 0) {
469 bio_put(b); 472 bio_put(b);
@@ -1526,7 +1529,6 @@ void bioset_free(struct bio_set *bs)
1526 if (bs->bio_pool) 1529 if (bs->bio_pool)
1527 mempool_destroy(bs->bio_pool); 1530 mempool_destroy(bs->bio_pool);
1528 1531
1529 bioset_integrity_free(bs);
1530 biovec_free_pools(bs); 1532 biovec_free_pools(bs);
1531 bio_put_slab(bs); 1533 bio_put_slab(bs);
1532 1534
@@ -1567,9 +1569,6 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1567 if (!bs->bio_pool) 1569 if (!bs->bio_pool)
1568 goto bad; 1570 goto bad;
1569 1571
1570 if (bioset_integrity_create(bs, pool_size))
1571 goto bad;
1572
1573 if (!biovec_create_pools(bs, pool_size)) 1572 if (!biovec_create_pools(bs, pool_size))
1574 return bs; 1573 return bs;
1575 1574
@@ -1586,6 +1585,13 @@ static void __init biovec_init_slabs(void)
1586 int size; 1585 int size;
1587 struct biovec_slab *bvs = bvec_slabs + i; 1586 struct biovec_slab *bvs = bvec_slabs + i;
1588 1587
1588#ifndef CONFIG_BLK_DEV_INTEGRITY
1589 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
1590 bvs->slab = NULL;
1591 continue;
1592 }
1593#endif
1594
1589 size = bvs->nr_vecs * sizeof(struct bio_vec); 1595 size = bvs->nr_vecs * sizeof(struct bio_vec);
1590 bvs->slab = kmem_cache_create(bvs->name, size, 0, 1596 bvs->slab = kmem_cache_create(bvs->name, size, 0,
1591 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1597 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
@@ -1600,7 +1606,6 @@ static int __init init_bio(void)
1600 if (!bio_slabs) 1606 if (!bio_slabs)
1601 panic("bio: can't allocate bios\n"); 1607 panic("bio: can't allocate bios\n");
1602 1608
1603 bio_integrity_init_slab();
1604 biovec_init_slabs(); 1609 biovec_init_slabs();
1605 1610
1606 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); 1611 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3e18175248e0..6ec80c0fc869 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2385,7 +2385,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2385 unsigned long thresh = 32 * 1024 * 1024; 2385 unsigned long thresh = 32 * 1024 * 1024;
2386 tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 2386 tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
2387 2387
2388 if (current_is_pdflush() || current->flags & PF_MEMALLOC) 2388 if (current->flags & PF_MEMALLOC)
2389 return; 2389 return;
2390 2390
2391 num_dirty = count_range_bits(tree, &start, (u64)-1, 2391 num_dirty = count_range_bits(tree, &start, (u64)-1,
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 851388fafc73..65984006192c 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -6,7 +6,16 @@ the server to treat subsequent connections, especially those that
6are authenticated as guest, as reconnections, invalidating the earlier 6are authenticated as guest, as reconnections, invalidating the earlier
7user's smb session. This fix allows cifs to mount multiple times to the 7user's smb session. This fix allows cifs to mount multiple times to the
8same server with different userids without risking invalidating earlier 8same server with different userids without risking invalidating earlier
9established security contexts. 9established security contexts. fsync now sends SMB Flush operation
10to better ensure that we wait for server to write all of the data to
11server disk (not just write it over the network). Add new mount
12parameter to allow user to disable sending the (slow) SMB flush on
13fsync if desired (fsync still flushes all cached write data to the server).
14Posix file open support added (turned off after one attempt if server
15fails to support it properly, as with Samba server versions prior to 3.3.2)
16Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too
17little memory for the "nativeFileSystem" field returned by the server
18during mount).
10 19
11Version 1.56 20Version 1.56
12------------ 21------------
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 341a98965bd0..6994a0f54f02 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -118,6 +118,18 @@ config CIFS_DEBUG2
118 option can be turned off unless you are debugging 118 option can be turned off unless you are debugging
119 cifs problems. If unsure, say N. 119 cifs problems. If unsure, say N.
120 120
121config CIFS_DFS_UPCALL
122 bool "DFS feature support"
123 depends on CIFS && KEYS
124 help
125 Distributed File System (DFS) support is used to access shares
126 transparently in an enterprise name space, even if the share
127 moves to a different server. This feature also enables
128 an upcall mechanism for CIFS which contacts userspace helper
129 utilities to provide server name resolution (host names to
130 IP addresses) which is needed for implicit mounts of DFS junction
131 points. If unsure, say N.
132
121config CIFS_EXPERIMENTAL 133config CIFS_EXPERIMENTAL
122 bool "CIFS Experimental Features (EXPERIMENTAL)" 134 bool "CIFS Experimental Features (EXPERIMENTAL)"
123 depends on CIFS && EXPERIMENTAL 135 depends on CIFS && EXPERIMENTAL
@@ -131,12 +143,3 @@ config CIFS_EXPERIMENTAL
131 (which is disabled by default). See the file fs/cifs/README 143 (which is disabled by default). See the file fs/cifs/README
132 for more details. If unsure, say N. 144 for more details. If unsure, say N.
133 145
134config CIFS_DFS_UPCALL
135 bool "DFS feature support (EXPERIMENTAL)"
136 depends on CIFS_EXPERIMENTAL
137 depends on KEYS
138 help
139 Enables an upcall mechanism for CIFS which contacts userspace
140 helper utilities to provide server name resolution (host names to
141 IP addresses) which is needed for implicit mounts of DFS junction
142 points. If unsure, say N.
diff --git a/fs/cifs/README b/fs/cifs/README
index da4515e3be20..07434181623b 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -472,6 +472,19 @@ A partial list of the supported mount options follows:
472 even if the cifs server would support posix advisory locks. 472 even if the cifs server would support posix advisory locks.
473 "forcemand" is accepted as a shorter form of this mount 473 "forcemand" is accepted as a shorter form of this mount
474 option. 474 option.
475 nostrictsync If this mount option is set, when an application does an
476 fsync call then the cifs client does not send an SMB Flush
477 to the server (to force the server to write all dirty data
478 for this file immediately to disk), although cifs still sends
479 all dirty (cached) file data to the server and waits for the
480 server to respond to the write. Since SMB Flush can be
481 very slow, and some servers may be reliable enough (to risk
482 delaying slightly flushing the data to disk on the server),
483 turning on this option may be useful to improve performance for
484 applications that fsync too much, at a small risk of server
485 crash. If this mount option is not set, by default cifs will
486 send an SMB flush request (and wait for a response) on every
487 fsync call.
475 nodfs Disable DFS (global name space support) even if the 488 nodfs Disable DFS (global name space support) even if the
476 server claims to support it. This can help work around 489 server claims to support it. This can help work around
477 a problem with parsing of DFS paths with Samba server 490 a problem with parsing of DFS paths with Samba server
@@ -692,13 +705,14 @@ require this helper. Note that NTLMv2 security (which does not require the
692cifs.upcall helper program), instead of using Kerberos, is sufficient for 705cifs.upcall helper program), instead of using Kerberos, is sufficient for
693some use cases. 706some use cases.
694 707
695Enabling DFS support (used to access shares transparently in an MS-DFS 708DFS support allows transparent redirection to shares in an MS-DFS name space.
696global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In 709In addition, DFS support for target shares which are specified as UNC
697addition, DFS support for target shares which are specified as UNC
698names which begin with host names (rather than IP addresses) requires 710names which begin with host names (rather than IP addresses) requires
699a user space helper (such as cifs.upcall) to be present in order to 711a user space helper (such as cifs.upcall) to be present in order to
700translate host names to ip address, and the user space helper must also 712translate host names to ip address, and the user space helper must also
701be configured in the file /etc/request-key.conf 713be configured in the file /etc/request-key.conf. Samba, Windows servers and
714many NAS appliances support DFS as a way of constructing a global name
715space to ease network configuration and improve reliability.
702 716
703To use cifs Kerberos and DFS support, the Linux keyutils package should be 717To use cifs Kerberos and DFS support, the Linux keyutils package should be
704installed and something like the following lines should be added to the 718installed and something like the following lines should be added to the
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 490e34bbf27a..877e4d9a1159 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -340,6 +340,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
340 seq_printf(m, "\nWrites: %d Bytes: %lld", 340 seq_printf(m, "\nWrites: %d Bytes: %lld",
341 atomic_read(&tcon->num_writes), 341 atomic_read(&tcon->num_writes),
342 (long long)(tcon->bytes_written)); 342 (long long)(tcon->bytes_written));
343 seq_printf(m, "\nFlushes: %d",
344 atomic_read(&tcon->num_flushes));
343 seq_printf(m, "\nLocks: %d HardLinks: %d " 345 seq_printf(m, "\nLocks: %d HardLinks: %d "
344 "Symlinks: %d", 346 "Symlinks: %d",
345 atomic_read(&tcon->num_locks), 347 atomic_read(&tcon->num_locks),
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 85c0a74d034d..5fdbf8a14472 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -104,9 +104,9 @@ static char *cifs_get_share_name(const char *node_name)
104 104
105 105
106/** 106/**
107 * compose_mount_options - creates mount options for refferral 107 * cifs_compose_mount_options - creates mount options for refferral
108 * @sb_mountdata: parent/root DFS mount options (template) 108 * @sb_mountdata: parent/root DFS mount options (template)
109 * @dentry: point where we are going to mount 109 * @fullpath: full path in UNC format
110 * @ref: server's referral 110 * @ref: server's referral
111 * @devname: pointer for saving device name 111 * @devname: pointer for saving device name
112 * 112 *
@@ -116,8 +116,8 @@ static char *cifs_get_share_name(const char *node_name)
116 * Returns: pointer to new mount options or ERR_PTR. 116 * Returns: pointer to new mount options or ERR_PTR.
117 * Caller is responcible for freeing retunrned value if it is not error. 117 * Caller is responcible for freeing retunrned value if it is not error.
118 */ 118 */
119static char *compose_mount_options(const char *sb_mountdata, 119char *cifs_compose_mount_options(const char *sb_mountdata,
120 struct dentry *dentry, 120 const char *fullpath,
121 const struct dfs_info3_param *ref, 121 const struct dfs_info3_param *ref,
122 char **devname) 122 char **devname)
123{ 123{
@@ -128,7 +128,6 @@ static char *compose_mount_options(const char *sb_mountdata,
128 char *srvIP = NULL; 128 char *srvIP = NULL;
129 char sep = ','; 129 char sep = ',';
130 int off, noff; 130 int off, noff;
131 char *fullpath;
132 131
133 if (sb_mountdata == NULL) 132 if (sb_mountdata == NULL)
134 return ERR_PTR(-EINVAL); 133 return ERR_PTR(-EINVAL);
@@ -202,17 +201,6 @@ static char *compose_mount_options(const char *sb_mountdata,
202 goto compose_mount_options_err; 201 goto compose_mount_options_err;
203 } 202 }
204 203
205 /*
206 * this function gives us a path with a double backslash prefix. We
207 * require a single backslash for DFS. Temporarily increment fullpath
208 * to put it in the proper form and decrement before freeing it.
209 */
210 fullpath = build_path_from_dentry(dentry);
211 if (!fullpath) {
212 rc = -ENOMEM;
213 goto compose_mount_options_err;
214 }
215 ++fullpath;
216 tkn_e = strchr(tkn_e + 1, '\\'); 204 tkn_e = strchr(tkn_e + 1, '\\');
217 if (tkn_e || (strlen(fullpath) - ref->path_consumed)) { 205 if (tkn_e || (strlen(fullpath) - ref->path_consumed)) {
218 strncat(mountdata, &sep, 1); 206 strncat(mountdata, &sep, 1);
@@ -221,8 +209,6 @@ static char *compose_mount_options(const char *sb_mountdata,
221 strcat(mountdata, tkn_e + 1); 209 strcat(mountdata, tkn_e + 1);
222 strcat(mountdata, fullpath + ref->path_consumed); 210 strcat(mountdata, fullpath + ref->path_consumed);
223 } 211 }
224 --fullpath;
225 kfree(fullpath);
226 212
227 /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/ 213 /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/
228 /*cFYI(1, ("%s: submount mountdata: %s", __func__, mountdata ));*/ 214 /*cFYI(1, ("%s: submount mountdata: %s", __func__, mountdata ));*/
@@ -245,10 +231,20 @@ static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent,
245 struct vfsmount *mnt; 231 struct vfsmount *mnt;
246 char *mountdata; 232 char *mountdata;
247 char *devname = NULL; 233 char *devname = NULL;
234 char *fullpath;
248 235
249 cifs_sb = CIFS_SB(dentry->d_inode->i_sb); 236 cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
250 mountdata = compose_mount_options(cifs_sb->mountdata, 237 /*
251 dentry, ref, &devname); 238 * this function gives us a path with a double backslash prefix. We
239 * require a single backslash for DFS.
240 */
241 fullpath = build_path_from_dentry(dentry);
242 if (!fullpath)
243 return ERR_PTR(-ENOMEM);
244
245 mountdata = cifs_compose_mount_options(cifs_sb->mountdata,
246 fullpath + 1, ref, &devname);
247 kfree(fullpath);
252 248
253 if (IS_ERR(mountdata)) 249 if (IS_ERR(mountdata))
254 return (struct vfsmount *)mountdata; 250 return (struct vfsmount *)mountdata;
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index c4c306f7b06f..4797787c6a44 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -32,6 +32,7 @@
32#define CIFS_MOUNT_OVERR_GID 0x800 /* override gid returned from server */ 32#define CIFS_MOUNT_OVERR_GID 0x800 /* override gid returned from server */
33#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */ 33#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */
34#define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */ 34#define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */
35#define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/
35 36
36struct cifs_sb_info { 37struct cifs_sb_info {
37 struct cifsTconInfo *tcon; /* primary mount */ 38 struct cifsTconInfo *tcon; /* primary mount */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e004f6db5fc8..9fbf4dff5da6 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -254,6 +254,7 @@ struct cifsTconInfo {
254 atomic_t num_smbs_sent; 254 atomic_t num_smbs_sent;
255 atomic_t num_writes; 255 atomic_t num_writes;
256 atomic_t num_reads; 256 atomic_t num_reads;
257 atomic_t num_flushes;
257 atomic_t num_oplock_brks; 258 atomic_t num_oplock_brks;
258 atomic_t num_opens; 259 atomic_t num_opens;
259 atomic_t num_closes; 260 atomic_t num_closes;
@@ -298,6 +299,7 @@ struct cifsTconInfo {
298 bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol 299 bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol
299 for this mount even if server would support */ 300 for this mount even if server would support */
300 bool local_lease:1; /* check leases (only) on local system not remote */ 301 bool local_lease:1; /* check leases (only) on local system not remote */
302 bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
301 bool need_reconnect:1; /* connection reset, tid now invalid */ 303 bool need_reconnect:1; /* connection reset, tid now invalid */
302 /* BB add field for back pointer to sb struct(s)? */ 304 /* BB add field for back pointer to sb struct(s)? */
303}; 305};
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b4e2e9f0ee3d..b370489c8da5 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifspdu.h 2 * fs/cifs/cifspdu.h
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2002,2008 4 * Copyright (c) International Business Machines Corp., 2002,2009
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -23,6 +23,7 @@
23#define _CIFSPDU_H 23#define _CIFSPDU_H
24 24
25#include <net/sock.h> 25#include <net/sock.h>
26#include "smbfsctl.h"
26 27
27#ifdef CONFIG_CIFS_WEAK_PW_HASH 28#ifdef CONFIG_CIFS_WEAK_PW_HASH
28#define LANMAN_PROT 0 29#define LANMAN_PROT 0
@@ -34,15 +35,15 @@
34#define POSIX_PROT (CIFS_PROT+1) 35#define POSIX_PROT (CIFS_PROT+1)
35#define BAD_PROT 0xFFFF 36#define BAD_PROT 0xFFFF
36 37
37/* SMB command codes */ 38/* SMB command codes:
38/* 39 * Note some commands have minimal (wct=0,bcc=0), or uninteresting, responses
39 * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
40 * (ie which include no useful data other than the SMB error code itself). 40 * (ie which include no useful data other than the SMB error code itself).
41 * Knowing this helps avoid response buffer allocations and copy in some cases 41 * This can allow us to avoid response buffer allocations and copy in some cases
42 */ 42 */
43#define SMB_COM_CREATE_DIRECTORY 0x00 /* trivial response */ 43#define SMB_COM_CREATE_DIRECTORY 0x00 /* trivial response */
44#define SMB_COM_DELETE_DIRECTORY 0x01 /* trivial response */ 44#define SMB_COM_DELETE_DIRECTORY 0x01 /* trivial response */
45#define SMB_COM_CLOSE 0x04 /* triv req/rsp, timestamp ignored */ 45#define SMB_COM_CLOSE 0x04 /* triv req/rsp, timestamp ignored */
46#define SMB_COM_FLUSH 0x05 /* triv req/rsp */
46#define SMB_COM_DELETE 0x06 /* trivial response */ 47#define SMB_COM_DELETE 0x06 /* trivial response */
47#define SMB_COM_RENAME 0x07 /* trivial response */ 48#define SMB_COM_RENAME 0x07 /* trivial response */
48#define SMB_COM_QUERY_INFORMATION 0x08 /* aka getattr */ 49#define SMB_COM_QUERY_INFORMATION 0x08 /* aka getattr */
@@ -790,6 +791,12 @@ typedef struct smb_com_close_rsp {
790 __u16 ByteCount; /* bct = 0 */ 791 __u16 ByteCount; /* bct = 0 */
791} __attribute__((packed)) CLOSE_RSP; 792} __attribute__((packed)) CLOSE_RSP;
792 793
794typedef struct smb_com_flush_req {
795 struct smb_hdr hdr; /* wct = 1 */
796 __u16 FileID;
797 __u16 ByteCount; /* 0 */
798} __attribute__((packed)) FLUSH_REQ;
799
793typedef struct smb_com_findclose_req { 800typedef struct smb_com_findclose_req {
794 struct smb_hdr hdr; /* wct = 1 */ 801 struct smb_hdr hdr; /* wct = 1 */
795 __u16 FileID; 802 __u16 FileID;
@@ -1924,19 +1931,19 @@ typedef struct smb_com_transaction2_get_dfs_refer_req {
1924#define DFS_TYPE_ROOT 0x0001 1931#define DFS_TYPE_ROOT 0x0001
1925 1932
1926/* Referral Entry Flags */ 1933/* Referral Entry Flags */
1927#define DFS_NAME_LIST_REF 0x0200 1934#define DFS_NAME_LIST_REF 0x0200 /* set for domain or DC referral responses */
1935#define DFS_TARGET_SET_BOUNDARY 0x0400 /* only valid with version 4 dfs req */
1928 1936
1929typedef struct dfs_referral_level_3 { 1937typedef struct dfs_referral_level_3 { /* version 4 is same, + one flag bit */
1930 __le16 VersionNumber; 1938 __le16 VersionNumber; /* must be 3 or 4 */
1931 __le16 Size; 1939 __le16 Size;
1932 __le16 ServerType; /* 0x0001 = root targets; 0x0000 = link targets */ 1940 __le16 ServerType; /* 0x0001 = root targets; 0x0000 = link targets */
1933 __le16 ReferralEntryFlags; /* 0x0200 bit set only for domain 1941 __le16 ReferralEntryFlags;
1934 or DC referral responce */
1935 __le32 TimeToLive; 1942 __le32 TimeToLive;
1936 __le16 DfsPathOffset; 1943 __le16 DfsPathOffset;
1937 __le16 DfsAlternatePathOffset; 1944 __le16 DfsAlternatePathOffset;
1938 __le16 NetworkAddressOffset; /* offset of the link target */ 1945 __le16 NetworkAddressOffset; /* offset of the link target */
1939 __le16 ServiceSiteGuid; 1946 __u8 ServiceSiteGuid[16]; /* MBZ, ignored */
1940} __attribute__((packed)) REFERRAL3; 1947} __attribute__((packed)) REFERRAL3;
1941 1948
1942typedef struct smb_com_transaction_get_dfs_refer_rsp { 1949typedef struct smb_com_transaction_get_dfs_refer_rsp {
@@ -1946,48 +1953,15 @@ typedef struct smb_com_transaction_get_dfs_refer_rsp {
1946 __u8 Pad; 1953 __u8 Pad;
1947 __le16 PathConsumed; 1954 __le16 PathConsumed;
1948 __le16 NumberOfReferrals; 1955 __le16 NumberOfReferrals;
1949 __le16 DFSFlags; 1956 __le32 DFSFlags;
1950 __u16 Pad2;
1951 REFERRAL3 referrals[1]; /* array of level 3 dfs_referral structures */ 1957 REFERRAL3 referrals[1]; /* array of level 3 dfs_referral structures */
1952 /* followed by the strings pointed to by the referral structures */ 1958 /* followed by the strings pointed to by the referral structures */
1953} __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_RSP; 1959} __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_RSP;
1954 1960
1955/* DFS Flags */ 1961/* DFS Flags */
1956#define DFSREF_REFERRAL_SERVER 0x0001 1962#define DFSREF_REFERRAL_SERVER 0x00000001 /* all targets are DFS roots */
1957#define DFSREF_STORAGE_SERVER 0x0002 1963#define DFSREF_STORAGE_SERVER 0x00000002 /* no further ref requests needed */
1958 1964#define DFSREF_TARGET_FAILBACK 0x00000004 /* only for DFS referral version 4 */
1959/* IOCTL information */
1960/*
1961 * List of ioctl function codes that look to be of interest to remote clients
1962 * like this one. Need to do some experimentation to make sure they all work
1963 * remotely. Some of the following, such as the encryption/compression ones
1964 * would be invoked from tools via a specialized hook into the VFS rather
1965 * than via the standard vfs entry points
1966 */
1967#define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000
1968#define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004
1969#define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008
1970#define FSCTL_LOCK_VOLUME 0x00090018
1971#define FSCTL_UNLOCK_VOLUME 0x0009001C
1972#define FSCTL_GET_COMPRESSION 0x0009003C
1973#define FSCTL_SET_COMPRESSION 0x0009C040
1974#define FSCTL_REQUEST_FILTER_OPLOCK 0x0009008C
1975#define FSCTL_FILESYS_GET_STATISTICS 0x00090090
1976#define FSCTL_SET_REPARSE_POINT 0x000900A4
1977#define FSCTL_GET_REPARSE_POINT 0x000900A8
1978#define FSCTL_DELETE_REPARSE_POINT 0x000900AC
1979#define FSCTL_SET_SPARSE 0x000900C4
1980#define FSCTL_SET_ZERO_DATA 0x000900C8
1981#define FSCTL_SET_ENCRYPTION 0x000900D7
1982#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB
1983#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF
1984#define FSCTL_READ_RAW_ENCRYPTED 0x000900E3
1985#define FSCTL_SIS_COPYFILE 0x00090100
1986#define FSCTL_SIS_LINK_FILES 0x0009C104
1987
1988#define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003
1989#define IO_REPARSE_TAG_HSM 0xC0000004
1990#define IO_REPARSE_TAG_SIS 0x80000007
1991 1965
1992/* 1966/*
1993 ************************************************************************ 1967 ************************************************************************
@@ -2508,8 +2482,6 @@ struct data_blob {
2508 6) Use nanosecond timestamps throughout all time fields if 2482 6) Use nanosecond timestamps throughout all time fields if
2509 corresponding attribute flag is set 2483 corresponding attribute flag is set
2510 7) sendfile - handle based copy 2484 7) sendfile - handle based copy
2511 8) Direct i/o
2512 9) Misc fcntls?
2513 2485
2514 what about fixing 64 bit alignment 2486 what about fixing 64 bit alignment
2515 2487
@@ -2628,7 +2600,5 @@ typedef struct file_chattr_info {
2628 __le64 mode; /* list of actual attribute bits on this inode */ 2600 __le64 mode; /* list of actual attribute bits on this inode */
2629} __attribute__((packed)) FILE_CHATTR_INFO; /* ext attributes 2601} __attribute__((packed)) FILE_CHATTR_INFO; /* ext attributes
2630 (chattr, chflags) level 0x206 */ 2602 (chattr, chflags) level 0x206 */
2631 2603#endif /* POSIX */
2632#endif
2633
2634#endif /* _CIFSPDU_H */ 2604#endif /* _CIFSPDU_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 083dfc57c7a3..4167716d32f2 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -44,6 +44,9 @@ extern void _FreeXid(unsigned int);
44extern char *build_path_from_dentry(struct dentry *); 44extern char *build_path_from_dentry(struct dentry *);
45extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb); 45extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb);
46extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 46extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
47extern char *cifs_compose_mount_options(const char *sb_mountdata,
48 const char *fullpath, const struct dfs_info3_param *ref,
49 char **devname);
47/* extern void renew_parental_timestamps(struct dentry *direntry);*/ 50/* extern void renew_parental_timestamps(struct dentry *direntry);*/
48extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, 51extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
49 struct smb_hdr * /* input */ , 52 struct smb_hdr * /* input */ ,
@@ -92,6 +95,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec);
92extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); 95extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
93extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); 96extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
94 97
98extern int cifs_posix_open(char *full_path, struct inode **pinode,
99 struct super_block *sb, int mode, int oflags,
100 int *poplock, __u16 *pnetfid, int xid);
95extern void posix_fill_in_inode(struct inode *tmp_inode, 101extern void posix_fill_in_inode(struct inode *tmp_inode,
96 FILE_UNIX_BASIC_INFO *pData, int isNewInode); 102 FILE_UNIX_BASIC_INFO *pData, int isNewInode);
97extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum); 103extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum);
@@ -281,6 +287,9 @@ extern int CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon,
281extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, 287extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon,
282 const int smb_file_id); 288 const int smb_file_id);
283 289
290extern int CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon,
291 const int smb_file_id);
292
284extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, 293extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
285 const int netfid, unsigned int count, 294 const int netfid, unsigned int count,
286 const __u64 lseek, unsigned int *nbytes, char **buf, 295 const __u64 lseek, unsigned int *nbytes, char **buf,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 939e2f76b959..bc09c998631f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1934,6 +1934,27 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1934} 1934}
1935 1935
1936int 1936int
1937CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1938{
1939 int rc = 0;
1940 FLUSH_REQ *pSMB = NULL;
1941 cFYI(1, ("In CIFSSMBFlush"));
1942
1943 rc = small_smb_init(SMB_COM_FLUSH, 1, tcon, (void **) &pSMB);
1944 if (rc)
1945 return rc;
1946
1947 pSMB->FileID = (__u16) smb_file_id;
1948 pSMB->ByteCount = 0;
1949 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
1950 cifs_stats_inc(&tcon->num_flushes);
1951 if (rc)
1952 cERROR(1, ("Send error in Flush = %d", rc));
1953
1954 return rc;
1955}
1956
1957int
1937CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, 1958CIFSSMBRename(const int xid, struct cifsTconInfo *tcon,
1938 const char *fromName, const char *toName, 1959 const char *fromName, const char *toName,
1939 const struct nls_table *nls_codepage, int remap) 1960 const struct nls_table *nls_codepage, int remap)
@@ -2356,8 +2377,10 @@ winCreateHardLinkRetry:
2356 PATH_MAX, nls_codepage, remap); 2377 PATH_MAX, nls_codepage, remap);
2357 name_len++; /* trailing null */ 2378 name_len++; /* trailing null */
2358 name_len *= 2; 2379 name_len *= 2;
2359 pSMB->OldFileName[name_len] = 0; /* pad */ 2380
2360 pSMB->OldFileName[name_len + 1] = 0x04; 2381 /* protocol specifies ASCII buffer format (0x04) for unicode */
2382 pSMB->OldFileName[name_len] = 0x04;
2383 pSMB->OldFileName[name_len + 1] = 0x00; /* pad */
2361 name_len2 = 2384 name_len2 =
2362 cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], 2385 cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2],
2363 toName, PATH_MAX, nls_codepage, remap); 2386 toName, PATH_MAX, nls_codepage, remap);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index da0f4ffa0613..0de3b5615a22 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -95,6 +95,7 @@ struct smb_vol {
95 bool local_lease:1; /* check leases only on local system, not remote */ 95 bool local_lease:1; /* check leases only on local system, not remote */
96 bool noblocksnd:1; 96 bool noblocksnd:1;
97 bool noautotune:1; 97 bool noautotune:1;
98 bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
98 unsigned int rsize; 99 unsigned int rsize;
99 unsigned int wsize; 100 unsigned int wsize;
100 unsigned int sockopt; 101 unsigned int sockopt;
@@ -1274,6 +1275,10 @@ cifs_parse_mount_options(char *options, const char *devname,
1274 vol->intr = 0; 1275 vol->intr = 0;
1275 } else if (strnicmp(data, "intr", 4) == 0) { 1276 } else if (strnicmp(data, "intr", 4) == 0) {
1276 vol->intr = 1; 1277 vol->intr = 1;
1278 } else if (strnicmp(data, "nostrictsync", 12) == 0) {
1279 vol->nostrictsync = 1;
1280 } else if (strnicmp(data, "strictsync", 10) == 0) {
1281 vol->nostrictsync = 0;
1277 } else if (strnicmp(data, "serverino", 7) == 0) { 1282 } else if (strnicmp(data, "serverino", 7) == 0) {
1278 vol->server_ino = 1; 1283 vol->server_ino = 1;
1279 } else if (strnicmp(data, "noserverino", 9) == 0) { 1284 } else if (strnicmp(data, "noserverino", 9) == 0) {
@@ -2160,6 +2165,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2160 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL; 2165 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
2161 if (pvolume_info->nobrl) 2166 if (pvolume_info->nobrl)
2162 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL; 2167 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
2168 if (pvolume_info->nostrictsync)
2169 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
2163 if (pvolume_info->mand_lock) 2170 if (pvolume_info->mand_lock)
2164 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL; 2171 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
2165 if (pvolume_info->cifs_acl) 2172 if (pvolume_info->cifs_acl)
@@ -3667,7 +3674,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3667 BCC(smb_buffer_response)) { 3674 BCC(smb_buffer_response)) {
3668 kfree(tcon->nativeFileSystem); 3675 kfree(tcon->nativeFileSystem);
3669 tcon->nativeFileSystem = 3676 tcon->nativeFileSystem =
3670 kzalloc(length + 2, GFP_KERNEL); 3677 kzalloc(2*(length + 1), GFP_KERNEL);
3671 if (tcon->nativeFileSystem) 3678 if (tcon->nativeFileSystem)
3672 cifs_strfromUCS_le( 3679 cifs_strfromUCS_le(
3673 tcon->nativeFileSystem, 3680 tcon->nativeFileSystem,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 89fb72832652..f9b6f68be976 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -129,7 +129,7 @@ cifs_bp_rename_retry:
129 return full_path; 129 return full_path;
130} 130}
131 131
132static int cifs_posix_open(char *full_path, struct inode **pinode, 132int cifs_posix_open(char *full_path, struct inode **pinode,
133 struct super_block *sb, int mode, int oflags, 133 struct super_block *sb, int mode, int oflags,
134 int *poplock, __u16 *pnetfid, int xid) 134 int *poplock, __u16 *pnetfid, int xid)
135{ 135{
@@ -187,7 +187,9 @@ static int cifs_posix_open(char *full_path, struct inode **pinode,
187 if (!pinode) 187 if (!pinode)
188 goto posix_open_ret; /* caller does not need info */ 188 goto posix_open_ret; /* caller does not need info */
189 189
190 *pinode = cifs_new_inode(sb, &presp_data->UniqueId); 190 if (*pinode == NULL)
191 *pinode = cifs_new_inode(sb, &presp_data->UniqueId);
192 /* else an inode was passed in. Update its info, don't create one */
191 193
192 /* We do not need to close the file if new_inode fails since 194 /* We do not need to close the file if new_inode fails since
193 the caller will retry qpathinfo as long as inode is null */ 195 the caller will retry qpathinfo as long as inode is null */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 12bb656fbe75..81747acca4c4 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -78,8 +78,36 @@ static inline int cifs_convert_flags(unsigned int flags)
78 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | 78 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
79 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | 79 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
80 FILE_READ_DATA); 80 FILE_READ_DATA);
81}
81 82
83static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
84{
85 fmode_t posix_flags = 0;
82 86
87 if ((flags & O_ACCMODE) == O_RDONLY)
88 posix_flags = FMODE_READ;
89 else if ((flags & O_ACCMODE) == O_WRONLY)
90 posix_flags = FMODE_WRITE;
91 else if ((flags & O_ACCMODE) == O_RDWR) {
92 /* GENERIC_ALL is too much permission to request
93 can cause unnecessary access denied on create */
94 /* return GENERIC_ALL; */
95 posix_flags = FMODE_READ | FMODE_WRITE;
96 }
97 /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
98 reopening a file. They had their effect on the original open */
99 if (flags & O_APPEND)
100 posix_flags |= (fmode_t)O_APPEND;
101 if (flags & O_SYNC)
102 posix_flags |= (fmode_t)O_SYNC;
103 if (flags & O_DIRECTORY)
104 posix_flags |= (fmode_t)O_DIRECTORY;
105 if (flags & O_NOFOLLOW)
106 posix_flags |= (fmode_t)O_NOFOLLOW;
107 if (flags & O_DIRECT)
108 posix_flags |= (fmode_t)O_DIRECT;
109
110 return posix_flags;
83} 111}
84 112
85static inline int cifs_get_disposition(unsigned int flags) 113static inline int cifs_get_disposition(unsigned int flags)
@@ -97,6 +125,80 @@ static inline int cifs_get_disposition(unsigned int flags)
97} 125}
98 126
99/* all arguments to this function must be checked for validity in caller */ 127/* all arguments to this function must be checked for validity in caller */
128static inline int cifs_posix_open_inode_helper(struct inode *inode,
129 struct file *file, struct cifsInodeInfo *pCifsInode,
130 struct cifsFileInfo *pCifsFile, int oplock, u16 netfid)
131{
132 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
133/* struct timespec temp; */ /* BB REMOVEME BB */
134
135 file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
136 if (file->private_data == NULL)
137 return -ENOMEM;
138 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
139 write_lock(&GlobalSMBSeslock);
140 list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
141
142 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
143 if (pCifsInode == NULL) {
144 write_unlock(&GlobalSMBSeslock);
145 return -EINVAL;
146 }
147
148 /* want handles we can use to read with first
149 in the list so we do not have to walk the
150 list to search for one in write_begin */
151 if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
152 list_add_tail(&pCifsFile->flist,
153 &pCifsInode->openFileList);
154 } else {
155 list_add(&pCifsFile->flist,
156 &pCifsInode->openFileList);
157 }
158
159 if (pCifsInode->clientCanCacheRead) {
160 /* we have the inode open somewhere else
161 no need to discard cache data */
162 goto psx_client_can_cache;
163 }
164
165 /* BB FIXME need to fix this check to move it earlier into posix_open
166 BB fIX following section BB FIXME */
167
168 /* if not oplocked, invalidate inode pages if mtime or file
169 size changed */
170/* temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
171 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
172 (file->f_path.dentry->d_inode->i_size ==
173 (loff_t)le64_to_cpu(buf->EndOfFile))) {
174 cFYI(1, ("inode unchanged on server"));
175 } else {
176 if (file->f_path.dentry->d_inode->i_mapping) {
177 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
178 if (rc != 0)
179 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
180 }
181 cFYI(1, ("invalidating remote inode since open detected it "
182 "changed"));
183 invalidate_remote_inode(file->f_path.dentry->d_inode);
184 } */
185
186psx_client_can_cache:
187 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
188 pCifsInode->clientCanCacheAll = true;
189 pCifsInode->clientCanCacheRead = true;
190 cFYI(1, ("Exclusive Oplock granted on inode %p",
191 file->f_path.dentry->d_inode));
192 } else if ((oplock & 0xF) == OPLOCK_READ)
193 pCifsInode->clientCanCacheRead = true;
194
195 /* will have to change the unlock if we reenable the
196 filemap_fdatawrite (which does not seem necessary */
197 write_unlock(&GlobalSMBSeslock);
198 return 0;
199}
200
201/* all arguments to this function must be checked for validity in caller */
100static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, 202static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
101 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile, 203 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
102 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf, 204 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
@@ -167,7 +269,7 @@ int cifs_open(struct inode *inode, struct file *file)
167 int rc = -EACCES; 269 int rc = -EACCES;
168 int xid, oplock; 270 int xid, oplock;
169 struct cifs_sb_info *cifs_sb; 271 struct cifs_sb_info *cifs_sb;
170 struct cifsTconInfo *pTcon; 272 struct cifsTconInfo *tcon;
171 struct cifsFileInfo *pCifsFile; 273 struct cifsFileInfo *pCifsFile;
172 struct cifsInodeInfo *pCifsInode; 274 struct cifsInodeInfo *pCifsInode;
173 struct list_head *tmp; 275 struct list_head *tmp;
@@ -180,7 +282,7 @@ int cifs_open(struct inode *inode, struct file *file)
180 xid = GetXid(); 282 xid = GetXid();
181 283
182 cifs_sb = CIFS_SB(inode->i_sb); 284 cifs_sb = CIFS_SB(inode->i_sb);
183 pTcon = cifs_sb->tcon; 285 tcon = cifs_sb->tcon;
184 286
185 if (file->f_flags & O_CREAT) { 287 if (file->f_flags & O_CREAT) {
186 /* search inode for this file and fill in file->private_data */ 288 /* search inode for this file and fill in file->private_data */
@@ -220,6 +322,45 @@ int cifs_open(struct inode *inode, struct file *file)
220 322
221 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s", 323 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
222 inode, file->f_flags, full_path)); 324 inode, file->f_flags, full_path));
325
326 if (oplockEnabled)
327 oplock = REQ_OPLOCK;
328 else
329 oplock = 0;
330
331 if (!tcon->broken_posix_open && tcon->unix_ext &&
332 (tcon->ses->capabilities & CAP_UNIX) &&
333 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
334 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
335 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
336 /* can not refresh inode info since size could be stale */
337 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
338 cifs_sb->mnt_file_mode /* ignored */,
339 oflags, &oplock, &netfid, xid);
340 if (rc == 0) {
341 cFYI(1, ("posix open succeeded"));
342 /* no need for special case handling of setting mode
343 on read only files needed here */
344
345 cifs_posix_open_inode_helper(inode, file, pCifsInode,
346 pCifsFile, oplock, netfid);
347 goto out;
348 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
349 if (tcon->ses->serverNOS)
350 cERROR(1, ("server %s of type %s returned"
351 " unexpected error on SMB posix open"
352 ", disabling posix open support."
353 " Check if server update available.",
354 tcon->ses->serverName,
355 tcon->ses->serverNOS));
356 tcon->broken_posix_open = true;
357 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
358 (rc != -EOPNOTSUPP)) /* path not found or net err */
359 goto out;
360 /* else fallthrough to retry open the old way on network i/o
361 or DFS errors */
362 }
363
223 desiredAccess = cifs_convert_flags(file->f_flags); 364 desiredAccess = cifs_convert_flags(file->f_flags);
224 365
225/********************************************************************* 366/*********************************************************************
@@ -248,11 +389,6 @@ int cifs_open(struct inode *inode, struct file *file)
248 389
249 disposition = cifs_get_disposition(file->f_flags); 390 disposition = cifs_get_disposition(file->f_flags);
250 391
251 if (oplockEnabled)
252 oplock = REQ_OPLOCK;
253 else
254 oplock = 0;
255
256 /* BB pass O_SYNC flag through on file attributes .. BB */ 392 /* BB pass O_SYNC flag through on file attributes .. BB */
257 393
258 /* Also refresh inode by passing in file_info buf returned by SMBOpen 394 /* Also refresh inode by passing in file_info buf returned by SMBOpen
@@ -269,7 +405,7 @@ int cifs_open(struct inode *inode, struct file *file)
269 } 405 }
270 406
271 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) 407 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
272 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, 408 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
273 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf, 409 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
274 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags 410 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
275 & CIFS_MOUNT_MAP_SPECIAL_CHR); 411 & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -278,7 +414,7 @@ int cifs_open(struct inode *inode, struct file *file)
278 414
279 if (rc == -EIO) { 415 if (rc == -EIO) {
280 /* Old server, try legacy style OpenX */ 416 /* Old server, try legacy style OpenX */
281 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition, 417 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
282 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf, 418 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
283 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags 419 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
284 & CIFS_MOUNT_MAP_SPECIAL_CHR); 420 & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -295,12 +431,12 @@ int cifs_open(struct inode *inode, struct file *file)
295 } 431 }
296 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); 432 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
297 write_lock(&GlobalSMBSeslock); 433 write_lock(&GlobalSMBSeslock);
298 list_add(&pCifsFile->tlist, &pTcon->openFileList); 434 list_add(&pCifsFile->tlist, &tcon->openFileList);
299 435
300 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 436 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
301 if (pCifsInode) { 437 if (pCifsInode) {
302 rc = cifs_open_inode_helper(inode, file, pCifsInode, 438 rc = cifs_open_inode_helper(inode, file, pCifsInode,
303 pCifsFile, pTcon, 439 pCifsFile, tcon,
304 &oplock, buf, full_path, xid); 440 &oplock, buf, full_path, xid);
305 } else { 441 } else {
306 write_unlock(&GlobalSMBSeslock); 442 write_unlock(&GlobalSMBSeslock);
@@ -309,7 +445,7 @@ int cifs_open(struct inode *inode, struct file *file)
309 if (oplock & CIFS_CREATE_ACTION) { 445 if (oplock & CIFS_CREATE_ACTION) {
310 /* time to set mode which we can not set earlier due to 446 /* time to set mode which we can not set earlier due to
311 problems creating new read-only files */ 447 problems creating new read-only files */
312 if (pTcon->unix_ext) { 448 if (tcon->unix_ext) {
313 struct cifs_unix_set_info_args args = { 449 struct cifs_unix_set_info_args args = {
314 .mode = inode->i_mode, 450 .mode = inode->i_mode,
315 .uid = NO_CHANGE_64, 451 .uid = NO_CHANGE_64,
@@ -319,7 +455,7 @@ int cifs_open(struct inode *inode, struct file *file)
319 .mtime = NO_CHANGE_64, 455 .mtime = NO_CHANGE_64,
320 .device = 0, 456 .device = 0,
321 }; 457 };
322 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, 458 CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
323 cifs_sb->local_nls, 459 cifs_sb->local_nls,
324 cifs_sb->mnt_cifs_flags & 460 cifs_sb->mnt_cifs_flags &
325 CIFS_MOUNT_MAP_SPECIAL_CHR); 461 CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -349,7 +485,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
349 int rc = -EACCES; 485 int rc = -EACCES;
350 int xid, oplock; 486 int xid, oplock;
351 struct cifs_sb_info *cifs_sb; 487 struct cifs_sb_info *cifs_sb;
352 struct cifsTconInfo *pTcon; 488 struct cifsTconInfo *tcon;
353 struct cifsFileInfo *pCifsFile; 489 struct cifsFileInfo *pCifsFile;
354 struct cifsInodeInfo *pCifsInode; 490 struct cifsInodeInfo *pCifsInode;
355 struct inode *inode; 491 struct inode *inode;
@@ -387,7 +523,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
387 } 523 }
388 524
389 cifs_sb = CIFS_SB(inode->i_sb); 525 cifs_sb = CIFS_SB(inode->i_sb);
390 pTcon = cifs_sb->tcon; 526 tcon = cifs_sb->tcon;
391 527
392/* can not grab rename sem here because various ops, including 528/* can not grab rename sem here because various ops, including
393 those that already have the rename sem can end up causing writepage 529 those that already have the rename sem can end up causing writepage
@@ -404,20 +540,37 @@ reopen_error_exit:
404 540
405 cFYI(1, ("inode = 0x%p file flags 0x%x for %s", 541 cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
406 inode, file->f_flags, full_path)); 542 inode, file->f_flags, full_path));
407 desiredAccess = cifs_convert_flags(file->f_flags);
408 543
409 if (oplockEnabled) 544 if (oplockEnabled)
410 oplock = REQ_OPLOCK; 545 oplock = REQ_OPLOCK;
411 else 546 else
412 oplock = 0; 547 oplock = 0;
413 548
549 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
550 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
551 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
552 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
553 /* can not refresh inode info since size could be stale */
554 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
555 cifs_sb->mnt_file_mode /* ignored */,
556 oflags, &oplock, &netfid, xid);
557 if (rc == 0) {
558 cFYI(1, ("posix reopen succeeded"));
559 goto reopen_success;
560 }
561 /* fallthrough to retry open the old way on errors, especially
562 in the reconnect path it is important to retry hard */
563 }
564
565 desiredAccess = cifs_convert_flags(file->f_flags);
566
414 /* Can not refresh inode by passing in file_info buf to be returned 567 /* Can not refresh inode by passing in file_info buf to be returned
415 by SMBOpen and then calling get_inode_info with returned buf 568 by SMBOpen and then calling get_inode_info with returned buf
416 since file might have write behind data that needs to be flushed 569 since file might have write behind data that needs to be flushed
417 and server version of file size can be stale. If we knew for sure 570 and server version of file size can be stale. If we knew for sure
418 that inode was not dirty locally we could do this */ 571 that inode was not dirty locally we could do this */
419 572
420 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, 573 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
421 CREATE_NOT_DIR, &netfid, &oplock, NULL, 574 CREATE_NOT_DIR, &netfid, &oplock, NULL,
422 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 575 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
423 CIFS_MOUNT_MAP_SPECIAL_CHR); 576 CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -426,6 +579,7 @@ reopen_error_exit:
426 cFYI(1, ("cifs_open returned 0x%x", rc)); 579 cFYI(1, ("cifs_open returned 0x%x", rc));
427 cFYI(1, ("oplock: %d", oplock)); 580 cFYI(1, ("oplock: %d", oplock));
428 } else { 581 } else {
582reopen_success:
429 pCifsFile->netfid = netfid; 583 pCifsFile->netfid = netfid;
430 pCifsFile->invalidHandle = false; 584 pCifsFile->invalidHandle = false;
431 up(&pCifsFile->fh_sem); 585 up(&pCifsFile->fh_sem);
@@ -439,7 +593,7 @@ reopen_error_exit:
439 go to server to get inode info */ 593 go to server to get inode info */
440 pCifsInode->clientCanCacheAll = false; 594 pCifsInode->clientCanCacheAll = false;
441 pCifsInode->clientCanCacheRead = false; 595 pCifsInode->clientCanCacheRead = false;
442 if (pTcon->unix_ext) 596 if (tcon->unix_ext)
443 rc = cifs_get_inode_info_unix(&inode, 597 rc = cifs_get_inode_info_unix(&inode,
444 full_path, inode->i_sb, xid); 598 full_path, inode->i_sb, xid);
445 else 599 else
@@ -467,7 +621,6 @@ reopen_error_exit:
467 cifs_relock_file(pCifsFile); 621 cifs_relock_file(pCifsFile);
468 } 622 }
469 } 623 }
470
471 kfree(full_path); 624 kfree(full_path);
472 FreeXid(xid); 625 FreeXid(xid);
473 return rc; 626 return rc;
@@ -1523,6 +1676,9 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1523{ 1676{
1524 int xid; 1677 int xid;
1525 int rc = 0; 1678 int rc = 0;
1679 struct cifsTconInfo *tcon;
1680 struct cifsFileInfo *smbfile =
1681 (struct cifsFileInfo *)file->private_data;
1526 struct inode *inode = file->f_path.dentry->d_inode; 1682 struct inode *inode = file->f_path.dentry->d_inode;
1527 1683
1528 xid = GetXid(); 1684 xid = GetXid();
@@ -1534,7 +1690,12 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1534 if (rc == 0) { 1690 if (rc == 0) {
1535 rc = CIFS_I(inode)->write_behind_rc; 1691 rc = CIFS_I(inode)->write_behind_rc;
1536 CIFS_I(inode)->write_behind_rc = 0; 1692 CIFS_I(inode)->write_behind_rc = 0;
1693 tcon = CIFS_SB(inode->i_sb)->tcon;
1694 if (!rc && tcon && smbfile &&
1695 !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1696 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1537 } 1697 }
1698
1538 FreeXid(xid); 1699 FreeXid(xid);
1539 return rc; 1700 return rc;
1540} 1701}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4690a360c855..a8797cc60805 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -763,6 +763,9 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
763 struct cifsTconInfo *pTcon = cifs_sb->tcon; 763 struct cifsTconInfo *pTcon = cifs_sb->tcon;
764 FILE_BASIC_INFO info_buf; 764 FILE_BASIC_INFO info_buf;
765 765
766 if (attrs == NULL)
767 return -EINVAL;
768
766 if (attrs->ia_valid & ATTR_ATIME) { 769 if (attrs->ia_valid & ATTR_ATIME) {
767 set_time = true; 770 set_time = true;
768 info_buf.LastAccessTime = 771 info_buf.LastAccessTime =
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
new file mode 100644
index 000000000000..7056b891e087
--- /dev/null
+++ b/fs/cifs/smbfsctl.h
@@ -0,0 +1,84 @@
1/*
2 * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
3 *
4 * Copyright (c) International Business Machines Corp., 2002,2009
5 * Author(s): Steve French (sfrench@us.ibm.com)
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22/* IOCTL information */
23/*
24 * List of ioctl/fsctl function codes that are or could be useful in the
25 * future to remote clients like cifs or SMB2 client. There is probably
26 * a slightly larger set of fsctls that NTFS local filesystem could handle,
27 * including the seven below that we do not have struct definitions for.
28 * Even with protocol definitions for most of these now available, we still
29 * need to do some experimentation to identify which are practical to do
30 * remotely. Some of the following, such as the encryption/compression ones
31 * could be invoked from tools via a specialized hook into the VFS rather
32 * than via the standard vfs entry points
33 */
34#define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000
35#define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004
36#define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008
37#define FSCTL_LOCK_VOLUME 0x00090018
38#define FSCTL_UNLOCK_VOLUME 0x0009001C
39#define FSCTL_IS_PATHNAME_VALID 0x0009002C /* BB add struct */
40#define FSCTL_GET_COMPRESSION 0x0009003C /* BB add struct */
41#define FSCTL_SET_COMPRESSION 0x0009C040 /* BB add struct */
42#define FSCTL_QUERY_FAT_BPB 0x00090058 /* BB add struct */
43/* Verify the next FSCTL number, we had it as 0x00090090 before */
44#define FSCTL_FILESYSTEM_GET_STATS 0x00090060 /* BB add struct */
45#define FSCTL_GET_NTFS_VOLUME_DATA 0x00090064 /* BB add struct */
46#define FSCTL_GET_RETRIEVAL_POINTERS 0x00090073 /* BB add struct */
47#define FSCTL_IS_VOLUME_DIRTY 0x00090078 /* BB add struct */
48#define FSCTL_ALLOW_EXTENDED_DASD_IO 0x00090083 /* BB add struct */
49#define FSCTL_REQUEST_FILTER_OPLOCK 0x0009008C
50#define FSCTL_FIND_FILES_BY_SID 0x0009008F /* BB add struct */
51#define FSCTL_SET_OBJECT_ID 0x00090098 /* BB add struct */
52#define FSCTL_GET_OBJECT_ID 0x0009009C /* BB add struct */
53#define FSCTL_DELETE_OBJECT_ID 0x000900A0 /* BB add struct */
54#define FSCTL_SET_REPARSE_POINT 0x000900A4 /* BB add struct */
55#define FSCTL_GET_REPARSE_POINT 0x000900A8 /* BB add struct */
56#define FSCTL_DELETE_REPARSE_POINT 0x000900AC /* BB add struct */
57#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */
58#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */
59#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */
60#define FSCTL_SET_ZERO_DATA 0x000900C8 /* BB add struct */
61#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */
62#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */
63#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */
64#define FSCTL_READ_RAW_ENCRYPTED 0x000900E3 /* BB add struct */
65#define FSCTL_READ_FILE_USN_DATA 0x000900EB /* BB add struct */
66#define FSCTL_WRITE_USN_CLOSE_RECORD 0x000900EF /* BB add struct */
67#define FSCTL_SIS_COPYFILE 0x00090100 /* BB add struct */
68#define FSCTL_RECALL_FILE 0x00090117 /* BB add struct */
69#define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */
70#define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */
71#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
72#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */
73#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
74#define FSCTL_SIS_LINK_FILES 0x0009C104
75#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
76#define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */
77/* strange that the number for this op is not sequential with previous op */
78#define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */
79#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
80#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
81
82#define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003
83#define IO_REPARSE_TAG_HSM 0xC0000004
84#define IO_REPARSE_TAG_SIS 0x80000007
diff --git a/fs/compat.c b/fs/compat.c
index d0145ca27572..0949b43794a4 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1402,6 +1402,7 @@ int compat_do_execve(char * filename,
1402 retval = mutex_lock_interruptible(&current->cred_exec_mutex); 1402 retval = mutex_lock_interruptible(&current->cred_exec_mutex);
1403 if (retval < 0) 1403 if (retval < 0)
1404 goto out_free; 1404 goto out_free;
1405 current->in_execve = 1;
1405 1406
1406 retval = -ENOMEM; 1407 retval = -ENOMEM;
1407 bprm->cred = prepare_exec_creds(); 1408 bprm->cred = prepare_exec_creds();
@@ -1454,6 +1455,7 @@ int compat_do_execve(char * filename,
1454 goto out; 1455 goto out;
1455 1456
1456 /* execve succeeded */ 1457 /* execve succeeded */
1458 current->in_execve = 0;
1457 mutex_unlock(&current->cred_exec_mutex); 1459 mutex_unlock(&current->cred_exec_mutex);
1458 acct_update_integrals(current); 1460 acct_update_integrals(current);
1459 free_bprm(bprm); 1461 free_bprm(bprm);
@@ -1470,6 +1472,7 @@ out_file:
1470 } 1472 }
1471 1473
1472out_unlock: 1474out_unlock:
1475 current->in_execve = 0;
1473 mutex_unlock(&current->cred_exec_mutex); 1476 mutex_unlock(&current->cred_exec_mutex);
1474 1477
1475out_free: 1478out_free:
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 45e59d3c7f1f..ff786687e93b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -522,6 +522,11 @@ static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg)
522 if (err) 522 if (err)
523 return -EFAULT; 523 return -EFAULT;
524 break; 524 break;
525 case SIOCSHWTSTAMP:
526 if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
527 return -EFAULT;
528 ifr.ifr_data = compat_ptr(uifr32->ifr_ifru.ifru_data);
529 break;
525 default: 530 default:
526 if (copy_from_user(&ifr, uifr32, sizeof(*uifr32))) 531 if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
527 return -EFAULT; 532 return -EFAULT;
@@ -1993,6 +1998,8 @@ COMPATIBLE_IOCTL(TUNSETGROUP)
1993COMPATIBLE_IOCTL(TUNGETFEATURES) 1998COMPATIBLE_IOCTL(TUNGETFEATURES)
1994COMPATIBLE_IOCTL(TUNSETOFFLOAD) 1999COMPATIBLE_IOCTL(TUNSETOFFLOAD)
1995COMPATIBLE_IOCTL(TUNSETTXFILTER) 2000COMPATIBLE_IOCTL(TUNSETTXFILTER)
2001COMPATIBLE_IOCTL(TUNGETSNDBUF)
2002COMPATIBLE_IOCTL(TUNSETSNDBUF)
1996/* Big V */ 2003/* Big V */
1997COMPATIBLE_IOCTL(VT_SETMODE) 2004COMPATIBLE_IOCTL(VT_SETMODE)
1998COMPATIBLE_IOCTL(VT_GETMODE) 2005COMPATIBLE_IOCTL(VT_GETMODE)
@@ -2566,6 +2573,7 @@ HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
2566HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc) 2573HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
2567HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc) 2574HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
2568HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc) 2575HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc)
2576HANDLE_IOCTL(SIOCSHWTSTAMP, dev_ifsioc)
2569 2577
2570/* ioctls used by appletalk ddp.c */ 2578/* ioctls used by appletalk ddp.c */
2571HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc) 2579HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc)
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 92969f879a17..858fba14aaa6 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -156,7 +156,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen
156 156
157 bucket = dir_hash(ls, name, namelen); 157 bucket = dir_hash(ls, name, namelen);
158 158
159 write_lock(&ls->ls_dirtbl[bucket].lock); 159 spin_lock(&ls->ls_dirtbl[bucket].lock);
160 160
161 de = search_bucket(ls, name, namelen, bucket); 161 de = search_bucket(ls, name, namelen, bucket);
162 162
@@ -173,7 +173,7 @@ void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen
173 list_del(&de->list); 173 list_del(&de->list);
174 kfree(de); 174 kfree(de);
175 out: 175 out:
176 write_unlock(&ls->ls_dirtbl[bucket].lock); 176 spin_unlock(&ls->ls_dirtbl[bucket].lock);
177} 177}
178 178
179void dlm_dir_clear(struct dlm_ls *ls) 179void dlm_dir_clear(struct dlm_ls *ls)
@@ -185,14 +185,14 @@ void dlm_dir_clear(struct dlm_ls *ls)
185 DLM_ASSERT(list_empty(&ls->ls_recover_list), ); 185 DLM_ASSERT(list_empty(&ls->ls_recover_list), );
186 186
187 for (i = 0; i < ls->ls_dirtbl_size; i++) { 187 for (i = 0; i < ls->ls_dirtbl_size; i++) {
188 write_lock(&ls->ls_dirtbl[i].lock); 188 spin_lock(&ls->ls_dirtbl[i].lock);
189 head = &ls->ls_dirtbl[i].list; 189 head = &ls->ls_dirtbl[i].list;
190 while (!list_empty(head)) { 190 while (!list_empty(head)) {
191 de = list_entry(head->next, struct dlm_direntry, list); 191 de = list_entry(head->next, struct dlm_direntry, list);
192 list_del(&de->list); 192 list_del(&de->list);
193 put_free_de(ls, de); 193 put_free_de(ls, de);
194 } 194 }
195 write_unlock(&ls->ls_dirtbl[i].lock); 195 spin_unlock(&ls->ls_dirtbl[i].lock);
196 } 196 }
197} 197}
198 198
@@ -307,17 +307,17 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
307 307
308 bucket = dir_hash(ls, name, namelen); 308 bucket = dir_hash(ls, name, namelen);
309 309
310 write_lock(&ls->ls_dirtbl[bucket].lock); 310 spin_lock(&ls->ls_dirtbl[bucket].lock);
311 de = search_bucket(ls, name, namelen, bucket); 311 de = search_bucket(ls, name, namelen, bucket);
312 if (de) { 312 if (de) {
313 *r_nodeid = de->master_nodeid; 313 *r_nodeid = de->master_nodeid;
314 write_unlock(&ls->ls_dirtbl[bucket].lock); 314 spin_unlock(&ls->ls_dirtbl[bucket].lock);
315 if (*r_nodeid == nodeid) 315 if (*r_nodeid == nodeid)
316 return -EEXIST; 316 return -EEXIST;
317 return 0; 317 return 0;
318 } 318 }
319 319
320 write_unlock(&ls->ls_dirtbl[bucket].lock); 320 spin_unlock(&ls->ls_dirtbl[bucket].lock);
321 321
322 if (namelen > DLM_RESNAME_MAXLEN) 322 if (namelen > DLM_RESNAME_MAXLEN)
323 return -EINVAL; 323 return -EINVAL;
@@ -330,7 +330,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
330 de->length = namelen; 330 de->length = namelen;
331 memcpy(de->name, name, namelen); 331 memcpy(de->name, name, namelen);
332 332
333 write_lock(&ls->ls_dirtbl[bucket].lock); 333 spin_lock(&ls->ls_dirtbl[bucket].lock);
334 tmp = search_bucket(ls, name, namelen, bucket); 334 tmp = search_bucket(ls, name, namelen, bucket);
335 if (tmp) { 335 if (tmp) {
336 kfree(de); 336 kfree(de);
@@ -339,7 +339,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
339 list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); 339 list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
340 } 340 }
341 *r_nodeid = de->master_nodeid; 341 *r_nodeid = de->master_nodeid;
342 write_unlock(&ls->ls_dirtbl[bucket].lock); 342 spin_unlock(&ls->ls_dirtbl[bucket].lock);
343 return 0; 343 return 0;
344} 344}
345 345
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 076e86f38bc8..d01ca0a711db 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -99,7 +99,7 @@ struct dlm_direntry {
99 99
100struct dlm_dirtable { 100struct dlm_dirtable {
101 struct list_head list; 101 struct list_head list;
102 rwlock_t lock; 102 spinlock_t lock;
103}; 103};
104 104
105struct dlm_rsbtable { 105struct dlm_rsbtable {
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 01e7d39c5fba..205ec95b347e 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -835,7 +835,7 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
835 lkb->lkb_wait_count++; 835 lkb->lkb_wait_count++;
836 hold_lkb(lkb); 836 hold_lkb(lkb);
837 837
838 log_debug(ls, "add overlap %x cur %d new %d count %d flags %x", 838 log_debug(ls, "addwait %x cur %d overlap %d count %d f %x",
839 lkb->lkb_id, lkb->lkb_wait_type, mstype, 839 lkb->lkb_id, lkb->lkb_wait_type, mstype,
840 lkb->lkb_wait_count, lkb->lkb_flags); 840 lkb->lkb_wait_count, lkb->lkb_flags);
841 goto out; 841 goto out;
@@ -851,7 +851,7 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
851 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); 851 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
852 out: 852 out:
853 if (error) 853 if (error)
854 log_error(ls, "add_to_waiters %x error %d flags %x %d %d %s", 854 log_error(ls, "addwait error %x %d flags %x %d %d %s",
855 lkb->lkb_id, error, lkb->lkb_flags, mstype, 855 lkb->lkb_id, error, lkb->lkb_flags, mstype,
856 lkb->lkb_wait_type, lkb->lkb_resource->res_name); 856 lkb->lkb_wait_type, lkb->lkb_resource->res_name);
857 mutex_unlock(&ls->ls_waiters_mutex); 857 mutex_unlock(&ls->ls_waiters_mutex);
@@ -863,23 +863,55 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
863 request reply on the requestqueue) between dlm_recover_waiters_pre() which 863 request reply on the requestqueue) between dlm_recover_waiters_pre() which
864 set RESEND and dlm_recover_waiters_post() */ 864 set RESEND and dlm_recover_waiters_post() */
865 865
866static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype) 866static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
867 struct dlm_message *ms)
867{ 868{
868 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 869 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
869 int overlap_done = 0; 870 int overlap_done = 0;
870 871
871 if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) { 872 if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) {
873 log_debug(ls, "remwait %x unlock_reply overlap", lkb->lkb_id);
872 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK; 874 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
873 overlap_done = 1; 875 overlap_done = 1;
874 goto out_del; 876 goto out_del;
875 } 877 }
876 878
877 if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) { 879 if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) {
880 log_debug(ls, "remwait %x cancel_reply overlap", lkb->lkb_id);
878 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL; 881 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
879 overlap_done = 1; 882 overlap_done = 1;
880 goto out_del; 883 goto out_del;
881 } 884 }
882 885
886 /* Cancel state was preemptively cleared by a successful convert,
887 see next comment, nothing to do. */
888
889 if ((mstype == DLM_MSG_CANCEL_REPLY) &&
890 (lkb->lkb_wait_type != DLM_MSG_CANCEL)) {
891 log_debug(ls, "remwait %x cancel_reply wait_type %d",
892 lkb->lkb_id, lkb->lkb_wait_type);
893 return -1;
894 }
895
896 /* Remove for the convert reply, and premptively remove for the
897 cancel reply. A convert has been granted while there's still
898 an outstanding cancel on it (the cancel is moot and the result
899 in the cancel reply should be 0). We preempt the cancel reply
900 because the app gets the convert result and then can follow up
901 with another op, like convert. This subsequent op would see the
902 lingering state of the cancel and fail with -EBUSY. */
903
904 if ((mstype == DLM_MSG_CONVERT_REPLY) &&
905 (lkb->lkb_wait_type == DLM_MSG_CONVERT) &&
906 is_overlap_cancel(lkb) && ms && !ms->m_result) {
907 log_debug(ls, "remwait %x convert_reply zap overlap_cancel",
908 lkb->lkb_id);
909 lkb->lkb_wait_type = 0;
910 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
911 lkb->lkb_wait_count--;
912 goto out_del;
913 }
914
883 /* N.B. type of reply may not always correspond to type of original 915 /* N.B. type of reply may not always correspond to type of original
884 msg due to lookup->request optimization, verify others? */ 916 msg due to lookup->request optimization, verify others? */
885 917
@@ -888,8 +920,8 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype)
888 goto out_del; 920 goto out_del;
889 } 921 }
890 922
891 log_error(ls, "remove_from_waiters lkid %x flags %x types %d %d", 923 log_error(ls, "remwait error %x reply %d flags %x no wait_type",
892 lkb->lkb_id, lkb->lkb_flags, mstype, lkb->lkb_wait_type); 924 lkb->lkb_id, mstype, lkb->lkb_flags);
893 return -1; 925 return -1;
894 926
895 out_del: 927 out_del:
@@ -899,7 +931,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype)
899 this would happen */ 931 this would happen */
900 932
901 if (overlap_done && lkb->lkb_wait_type) { 933 if (overlap_done && lkb->lkb_wait_type) {
902 log_error(ls, "remove_from_waiters %x reply %d give up on %d", 934 log_error(ls, "remwait error %x reply %d wait_type %d overlap",
903 lkb->lkb_id, mstype, lkb->lkb_wait_type); 935 lkb->lkb_id, mstype, lkb->lkb_wait_type);
904 lkb->lkb_wait_count--; 936 lkb->lkb_wait_count--;
905 lkb->lkb_wait_type = 0; 937 lkb->lkb_wait_type = 0;
@@ -921,7 +953,7 @@ static int remove_from_waiters(struct dlm_lkb *lkb, int mstype)
921 int error; 953 int error;
922 954
923 mutex_lock(&ls->ls_waiters_mutex); 955 mutex_lock(&ls->ls_waiters_mutex);
924 error = _remove_from_waiters(lkb, mstype); 956 error = _remove_from_waiters(lkb, mstype, NULL);
925 mutex_unlock(&ls->ls_waiters_mutex); 957 mutex_unlock(&ls->ls_waiters_mutex);
926 return error; 958 return error;
927} 959}
@@ -936,7 +968,7 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
936 968
937 if (ms != &ls->ls_stub_ms) 969 if (ms != &ls->ls_stub_ms)
938 mutex_lock(&ls->ls_waiters_mutex); 970 mutex_lock(&ls->ls_waiters_mutex);
939 error = _remove_from_waiters(lkb, ms->m_type); 971 error = _remove_from_waiters(lkb, ms->m_type, ms);
940 if (ms != &ls->ls_stub_ms) 972 if (ms != &ls->ls_stub_ms)
941 mutex_unlock(&ls->ls_waiters_mutex); 973 mutex_unlock(&ls->ls_waiters_mutex);
942 return error; 974 return error;
@@ -2083,6 +2115,11 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2083 lkb->lkb_timeout_cs = args->timeout; 2115 lkb->lkb_timeout_cs = args->timeout;
2084 rv = 0; 2116 rv = 0;
2085 out: 2117 out:
2118 if (rv)
2119 log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s",
2120 rv, lkb->lkb_id, lkb->lkb_flags, args->flags,
2121 lkb->lkb_status, lkb->lkb_wait_type,
2122 lkb->lkb_resource->res_name);
2086 return rv; 2123 return rv;
2087} 2124}
2088 2125
@@ -2149,6 +2186,13 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
2149 goto out; 2186 goto out;
2150 } 2187 }
2151 2188
2189 /* there's nothing to cancel */
2190 if (lkb->lkb_status == DLM_LKSTS_GRANTED &&
2191 !lkb->lkb_wait_type) {
2192 rv = -EBUSY;
2193 goto out;
2194 }
2195
2152 switch (lkb->lkb_wait_type) { 2196 switch (lkb->lkb_wait_type) {
2153 case DLM_MSG_LOOKUP: 2197 case DLM_MSG_LOOKUP:
2154 case DLM_MSG_REQUEST: 2198 case DLM_MSG_REQUEST:
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index aa32e5f02493..cd8e2df3c295 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -487,7 +487,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
487 goto out_lkbfree; 487 goto out_lkbfree;
488 for (i = 0; i < size; i++) { 488 for (i = 0; i < size; i++) {
489 INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); 489 INIT_LIST_HEAD(&ls->ls_dirtbl[i].list);
490 rwlock_init(&ls->ls_dirtbl[i].lock); 490 spin_lock_init(&ls->ls_dirtbl[i].lock);
491 } 491 }
492 492
493 INIT_LIST_HEAD(&ls->ls_waiters); 493 INIT_LIST_HEAD(&ls->ls_waiters);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 103a5ebd1371..609108a83267 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -2,7 +2,7 @@
2******************************************************************************* 2*******************************************************************************
3** 3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 5** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6** 6**
7** This copyrighted material is made available to anyone wishing to use, 7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions 8** modify, copy, or redistribute it subject to the terms and conditions
@@ -21,7 +21,7 @@
21 * 21 *
22 * Cluster nodes are referred to by their nodeids. nodeids are 22 * Cluster nodes are referred to by their nodeids. nodeids are
23 * simply 32 bit numbers to the locking module - if they need to 23 * simply 32 bit numbers to the locking module - if they need to
24 * be expanded for the cluster infrastructure then that is it's 24 * be expanded for the cluster infrastructure then that is its
25 * responsibility. It is this layer's 25 * responsibility. It is this layer's
26 * responsibility to resolve these into IP address or 26 * responsibility to resolve these into IP address or
27 * whatever it needs for inter-node communication. 27 * whatever it needs for inter-node communication.
@@ -36,9 +36,9 @@
36 * of high load. Also, this way, the sending thread can collect together 36 * of high load. Also, this way, the sending thread can collect together
37 * messages bound for one node and send them in one block. 37 * messages bound for one node and send them in one block.
38 * 38 *
39 * lowcomms will choose to use wither TCP or SCTP as its transport layer 39 * lowcomms will choose to use either TCP or SCTP as its transport layer
40 * depending on the configuration variable 'protocol'. This should be set 40 * depending on the configuration variable 'protocol'. This should be set
41 * to 0 (default) for TCP or 1 for SCTP. It shouldbe configured using a 41 * to 0 (default) for TCP or 1 for SCTP. It should be configured using a
42 * cluster-wide mechanism as it must be the same on all nodes of the cluster 42 * cluster-wide mechanism as it must be the same on all nodes of the cluster
43 * for the DLM to function. 43 * for the DLM to function.
44 * 44 *
@@ -48,11 +48,11 @@
48#include <net/sock.h> 48#include <net/sock.h>
49#include <net/tcp.h> 49#include <net/tcp.h>
50#include <linux/pagemap.h> 50#include <linux/pagemap.h>
51#include <linux/idr.h>
52#include <linux/file.h> 51#include <linux/file.h>
53#include <linux/mutex.h> 52#include <linux/mutex.h>
54#include <linux/sctp.h> 53#include <linux/sctp.h>
55#include <net/sctp/user.h> 54#include <net/sctp/user.h>
55#include <net/ipv6.h>
56 56
57#include "dlm_internal.h" 57#include "dlm_internal.h"
58#include "lowcomms.h" 58#include "lowcomms.h"
@@ -60,6 +60,7 @@
60#include "config.h" 60#include "config.h"
61 61
62#define NEEDED_RMEM (4*1024*1024) 62#define NEEDED_RMEM (4*1024*1024)
63#define CONN_HASH_SIZE 32
63 64
64struct cbuf { 65struct cbuf {
65 unsigned int base; 66 unsigned int base;
@@ -114,6 +115,7 @@ struct connection {
114 int retries; 115 int retries;
115#define MAX_CONNECT_RETRIES 3 116#define MAX_CONNECT_RETRIES 3
116 int sctp_assoc; 117 int sctp_assoc;
118 struct hlist_node list;
117 struct connection *othercon; 119 struct connection *othercon;
118 struct work_struct rwork; /* Receive workqueue */ 120 struct work_struct rwork; /* Receive workqueue */
119 struct work_struct swork; /* Send workqueue */ 121 struct work_struct swork; /* Send workqueue */
@@ -138,14 +140,37 @@ static int dlm_local_count;
138static struct workqueue_struct *recv_workqueue; 140static struct workqueue_struct *recv_workqueue;
139static struct workqueue_struct *send_workqueue; 141static struct workqueue_struct *send_workqueue;
140 142
141static DEFINE_IDR(connections_idr); 143static struct hlist_head connection_hash[CONN_HASH_SIZE];
142static DEFINE_MUTEX(connections_lock); 144static DEFINE_MUTEX(connections_lock);
143static int max_nodeid;
144static struct kmem_cache *con_cache; 145static struct kmem_cache *con_cache;
145 146
146static void process_recv_sockets(struct work_struct *work); 147static void process_recv_sockets(struct work_struct *work);
147static void process_send_sockets(struct work_struct *work); 148static void process_send_sockets(struct work_struct *work);
148 149
150
151/* This is deliberately very simple because most clusters have simple
152 sequential nodeids, so we should be able to go straight to a connection
153 struct in the array */
154static inline int nodeid_hash(int nodeid)
155{
156 return nodeid & (CONN_HASH_SIZE-1);
157}
158
159static struct connection *__find_con(int nodeid)
160{
161 int r;
162 struct hlist_node *h;
163 struct connection *con;
164
165 r = nodeid_hash(nodeid);
166
167 hlist_for_each_entry(con, h, &connection_hash[r], list) {
168 if (con->nodeid == nodeid)
169 return con;
170 }
171 return NULL;
172}
173
149/* 174/*
150 * If 'allocation' is zero then we don't attempt to create a new 175 * If 'allocation' is zero then we don't attempt to create a new
151 * connection structure for this node. 176 * connection structure for this node.
@@ -154,31 +179,17 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
154{ 179{
155 struct connection *con = NULL; 180 struct connection *con = NULL;
156 int r; 181 int r;
157 int n;
158 182
159 con = idr_find(&connections_idr, nodeid); 183 con = __find_con(nodeid);
160 if (con || !alloc) 184 if (con || !alloc)
161 return con; 185 return con;
162 186
163 r = idr_pre_get(&connections_idr, alloc);
164 if (!r)
165 return NULL;
166
167 con = kmem_cache_zalloc(con_cache, alloc); 187 con = kmem_cache_zalloc(con_cache, alloc);
168 if (!con) 188 if (!con)
169 return NULL; 189 return NULL;
170 190
171 r = idr_get_new_above(&connections_idr, con, nodeid, &n); 191 r = nodeid_hash(nodeid);
172 if (r) { 192 hlist_add_head(&con->list, &connection_hash[r]);
173 kmem_cache_free(con_cache, con);
174 return NULL;
175 }
176
177 if (n != nodeid) {
178 idr_remove(&connections_idr, n);
179 kmem_cache_free(con_cache, con);
180 return NULL;
181 }
182 193
183 con->nodeid = nodeid; 194 con->nodeid = nodeid;
184 mutex_init(&con->sock_mutex); 195 mutex_init(&con->sock_mutex);
@@ -189,19 +200,30 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
189 200
190 /* Setup action pointers for child sockets */ 201 /* Setup action pointers for child sockets */
191 if (con->nodeid) { 202 if (con->nodeid) {
192 struct connection *zerocon = idr_find(&connections_idr, 0); 203 struct connection *zerocon = __find_con(0);
193 204
194 con->connect_action = zerocon->connect_action; 205 con->connect_action = zerocon->connect_action;
195 if (!con->rx_action) 206 if (!con->rx_action)
196 con->rx_action = zerocon->rx_action; 207 con->rx_action = zerocon->rx_action;
197 } 208 }
198 209
199 if (nodeid > max_nodeid)
200 max_nodeid = nodeid;
201
202 return con; 210 return con;
203} 211}
204 212
213/* Loop round all connections */
214static void foreach_conn(void (*conn_func)(struct connection *c))
215{
216 int i;
217 struct hlist_node *h, *n;
218 struct connection *con;
219
220 for (i = 0; i < CONN_HASH_SIZE; i++) {
221 hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){
222 conn_func(con);
223 }
224 }
225}
226
205static struct connection *nodeid2con(int nodeid, gfp_t allocation) 227static struct connection *nodeid2con(int nodeid, gfp_t allocation)
206{ 228{
207 struct connection *con; 229 struct connection *con;
@@ -217,14 +239,17 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
217static struct connection *assoc2con(int assoc_id) 239static struct connection *assoc2con(int assoc_id)
218{ 240{
219 int i; 241 int i;
242 struct hlist_node *h;
220 struct connection *con; 243 struct connection *con;
221 244
222 mutex_lock(&connections_lock); 245 mutex_lock(&connections_lock);
223 for (i=0; i<=max_nodeid; i++) { 246
224 con = __nodeid2con(i, 0); 247 for (i = 0 ; i < CONN_HASH_SIZE; i++) {
225 if (con && con->sctp_assoc == assoc_id) { 248 hlist_for_each_entry(con, h, &connection_hash[i], list) {
226 mutex_unlock(&connections_lock); 249 if (con && con->sctp_assoc == assoc_id) {
227 return con; 250 mutex_unlock(&connections_lock);
251 return con;
252 }
228 } 253 }
229 } 254 }
230 mutex_unlock(&connections_lock); 255 mutex_unlock(&connections_lock);
@@ -250,8 +275,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
250 } else { 275 } else {
251 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; 276 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr;
252 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; 277 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
253 memcpy(&ret6->sin6_addr, &in6->sin6_addr, 278 ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr);
254 sizeof(in6->sin6_addr));
255 } 279 }
256 280
257 return 0; 281 return 0;
@@ -376,25 +400,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd)
376 log_print("send EOF to node failed: %d", ret); 400 log_print("send EOF to node failed: %d", ret);
377} 401}
378 402
403static void sctp_init_failed_foreach(struct connection *con)
404{
405 con->sctp_assoc = 0;
406 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
407 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
408 queue_work(send_workqueue, &con->swork);
409 }
410}
411
379/* INIT failed but we don't know which node... 412/* INIT failed but we don't know which node...
380 restart INIT on all pending nodes */ 413 restart INIT on all pending nodes */
381static void sctp_init_failed(void) 414static void sctp_init_failed(void)
382{ 415{
383 int i;
384 struct connection *con;
385
386 mutex_lock(&connections_lock); 416 mutex_lock(&connections_lock);
387 for (i=1; i<=max_nodeid; i++) { 417
388 con = __nodeid2con(i, 0); 418 foreach_conn(sctp_init_failed_foreach);
389 if (!con) 419
390 continue;
391 con->sctp_assoc = 0;
392 if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
393 if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
394 queue_work(send_workqueue, &con->swork);
395 }
396 }
397 }
398 mutex_unlock(&connections_lock); 420 mutex_unlock(&connections_lock);
399} 421}
400 422
@@ -1313,13 +1335,10 @@ out_connect:
1313 1335
1314static void clean_one_writequeue(struct connection *con) 1336static void clean_one_writequeue(struct connection *con)
1315{ 1337{
1316 struct list_head *list; 1338 struct writequeue_entry *e, *safe;
1317 struct list_head *temp;
1318 1339
1319 spin_lock(&con->writequeue_lock); 1340 spin_lock(&con->writequeue_lock);
1320 list_for_each_safe(list, temp, &con->writequeue) { 1341 list_for_each_entry_safe(e, safe, &con->writequeue, list) {
1321 struct writequeue_entry *e =
1322 list_entry(list, struct writequeue_entry, list);
1323 list_del(&e->list); 1342 list_del(&e->list);
1324 free_entry(e); 1343 free_entry(e);
1325 } 1344 }
@@ -1369,14 +1388,7 @@ static void process_send_sockets(struct work_struct *work)
1369/* Discard all entries on the write queues */ 1388/* Discard all entries on the write queues */
1370static void clean_writequeues(void) 1389static void clean_writequeues(void)
1371{ 1390{
1372 int nodeid; 1391 foreach_conn(clean_one_writequeue);
1373
1374 for (nodeid = 1; nodeid <= max_nodeid; nodeid++) {
1375 struct connection *con = __nodeid2con(nodeid, 0);
1376
1377 if (con)
1378 clean_one_writequeue(con);
1379 }
1380} 1392}
1381 1393
1382static void work_stop(void) 1394static void work_stop(void)
@@ -1406,23 +1418,29 @@ static int work_start(void)
1406 return 0; 1418 return 0;
1407} 1419}
1408 1420
1409void dlm_lowcomms_stop(void) 1421static void stop_conn(struct connection *con)
1410{ 1422{
1411 int i; 1423 con->flags |= 0x0F;
1412 struct connection *con; 1424 if (con->sock)
1425 con->sock->sk->sk_user_data = NULL;
1426}
1413 1427
1428static void free_conn(struct connection *con)
1429{
1430 close_connection(con, true);
1431 if (con->othercon)
1432 kmem_cache_free(con_cache, con->othercon);
1433 hlist_del(&con->list);
1434 kmem_cache_free(con_cache, con);
1435}
1436
1437void dlm_lowcomms_stop(void)
1438{
1414 /* Set all the flags to prevent any 1439 /* Set all the flags to prevent any
1415 socket activity. 1440 socket activity.
1416 */ 1441 */
1417 mutex_lock(&connections_lock); 1442 mutex_lock(&connections_lock);
1418 for (i = 0; i <= max_nodeid; i++) { 1443 foreach_conn(stop_conn);
1419 con = __nodeid2con(i, 0);
1420 if (con) {
1421 con->flags |= 0x0F;
1422 if (con->sock)
1423 con->sock->sk->sk_user_data = NULL;
1424 }
1425 }
1426 mutex_unlock(&connections_lock); 1444 mutex_unlock(&connections_lock);
1427 1445
1428 work_stop(); 1446 work_stop();
@@ -1430,25 +1448,20 @@ void dlm_lowcomms_stop(void)
1430 mutex_lock(&connections_lock); 1448 mutex_lock(&connections_lock);
1431 clean_writequeues(); 1449 clean_writequeues();
1432 1450
1433 for (i = 0; i <= max_nodeid; i++) { 1451 foreach_conn(free_conn);
1434 con = __nodeid2con(i, 0); 1452
1435 if (con) {
1436 close_connection(con, true);
1437 if (con->othercon)
1438 kmem_cache_free(con_cache, con->othercon);
1439 kmem_cache_free(con_cache, con);
1440 }
1441 }
1442 max_nodeid = 0;
1443 mutex_unlock(&connections_lock); 1453 mutex_unlock(&connections_lock);
1444 kmem_cache_destroy(con_cache); 1454 kmem_cache_destroy(con_cache);
1445 idr_init(&connections_idr);
1446} 1455}
1447 1456
1448int dlm_lowcomms_start(void) 1457int dlm_lowcomms_start(void)
1449{ 1458{
1450 int error = -EINVAL; 1459 int error = -EINVAL;
1451 struct connection *con; 1460 struct connection *con;
1461 int i;
1462
1463 for (i = 0; i < CONN_HASH_SIZE; i++)
1464 INIT_HLIST_HEAD(&connection_hash[i]);
1452 1465
1453 init_local(); 1466 init_local();
1454 if (!dlm_local_count) { 1467 if (!dlm_local_count) {
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 065149e84f42..ebce994ab0b7 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. 2 * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved.
3 * 3 *
4 * This copyrighted material is made available to anyone wishing to use, 4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions 5 * modify, copy, or redistribute it subject to the terms and conditions
@@ -84,7 +84,7 @@ struct dlm_lock_result32 {
84 84
85static void compat_input(struct dlm_write_request *kb, 85static void compat_input(struct dlm_write_request *kb,
86 struct dlm_write_request32 *kb32, 86 struct dlm_write_request32 *kb32,
87 size_t count) 87 int namelen)
88{ 88{
89 kb->version[0] = kb32->version[0]; 89 kb->version[0] = kb32->version[0];
90 kb->version[1] = kb32->version[1]; 90 kb->version[1] = kb32->version[1];
@@ -96,8 +96,7 @@ static void compat_input(struct dlm_write_request *kb,
96 kb->cmd == DLM_USER_REMOVE_LOCKSPACE) { 96 kb->cmd == DLM_USER_REMOVE_LOCKSPACE) {
97 kb->i.lspace.flags = kb32->i.lspace.flags; 97 kb->i.lspace.flags = kb32->i.lspace.flags;
98 kb->i.lspace.minor = kb32->i.lspace.minor; 98 kb->i.lspace.minor = kb32->i.lspace.minor;
99 memcpy(kb->i.lspace.name, kb32->i.lspace.name, count - 99 memcpy(kb->i.lspace.name, kb32->i.lspace.name, namelen);
100 offsetof(struct dlm_write_request32, i.lspace.name));
101 } else if (kb->cmd == DLM_USER_PURGE) { 100 } else if (kb->cmd == DLM_USER_PURGE) {
102 kb->i.purge.nodeid = kb32->i.purge.nodeid; 101 kb->i.purge.nodeid = kb32->i.purge.nodeid;
103 kb->i.purge.pid = kb32->i.purge.pid; 102 kb->i.purge.pid = kb32->i.purge.pid;
@@ -115,8 +114,7 @@ static void compat_input(struct dlm_write_request *kb,
115 kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr; 114 kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr;
116 kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb; 115 kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb;
117 memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN); 116 memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
118 memcpy(kb->i.lock.name, kb32->i.lock.name, count - 117 memcpy(kb->i.lock.name, kb32->i.lock.name, namelen);
119 offsetof(struct dlm_write_request32, i.lock.name));
120 } 118 }
121} 119}
122 120
@@ -539,9 +537,16 @@ static ssize_t device_write(struct file *file, const char __user *buf,
539#ifdef CONFIG_COMPAT 537#ifdef CONFIG_COMPAT
540 if (!kbuf->is64bit) { 538 if (!kbuf->is64bit) {
541 struct dlm_write_request32 *k32buf; 539 struct dlm_write_request32 *k32buf;
540 int namelen = 0;
541
542 if (count > sizeof(struct dlm_write_request32))
543 namelen = count - sizeof(struct dlm_write_request32);
544
542 k32buf = (struct dlm_write_request32 *)kbuf; 545 k32buf = (struct dlm_write_request32 *)kbuf;
543 kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - 546
544 sizeof(struct dlm_write_request32)), GFP_KERNEL); 547 /* add 1 after namelen so that the name string is terminated */
548 kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1,
549 GFP_KERNEL);
545 if (!kbuf) { 550 if (!kbuf) {
546 kfree(k32buf); 551 kfree(k32buf);
547 return -ENOMEM; 552 return -ENOMEM;
@@ -549,7 +554,8 @@ static ssize_t device_write(struct file *file, const char __user *buf,
549 554
550 if (proc) 555 if (proc)
551 set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); 556 set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
552 compat_input(kbuf, k32buf, count + 1); 557
558 compat_input(kbuf, k32buf, namelen);
553 kfree(k32buf); 559 kfree(k32buf);
554 } 560 }
555#endif 561#endif
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 011b9b8c90c6..c5c424f23fd5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -417,10 +417,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
417 ep_unregister_pollwait(ep, epi); 417 ep_unregister_pollwait(ep, epi);
418 418
419 /* Remove the current item from the list of epoll hooks */ 419 /* Remove the current item from the list of epoll hooks */
420 spin_lock(&file->f_ep_lock); 420 spin_lock(&file->f_lock);
421 if (ep_is_linked(&epi->fllink)) 421 if (ep_is_linked(&epi->fllink))
422 list_del_init(&epi->fllink); 422 list_del_init(&epi->fllink);
423 spin_unlock(&file->f_ep_lock); 423 spin_unlock(&file->f_lock);
424 424
425 rb_erase(&epi->rbn, &ep->rbr); 425 rb_erase(&epi->rbn, &ep->rbr);
426 426
@@ -538,7 +538,7 @@ void eventpoll_release_file(struct file *file)
538 struct epitem *epi; 538 struct epitem *epi;
539 539
540 /* 540 /*
541 * We don't want to get "file->f_ep_lock" because it is not 541 * We don't want to get "file->f_lock" because it is not
542 * necessary. It is not necessary because we're in the "struct file" 542 * necessary. It is not necessary because we're in the "struct file"
543 * cleanup path, and this means that noone is using this file anymore. 543 * cleanup path, and this means that noone is using this file anymore.
544 * So, for example, epoll_ctl() cannot hit here sicne if we reach this 544 * So, for example, epoll_ctl() cannot hit here sicne if we reach this
@@ -547,6 +547,8 @@ void eventpoll_release_file(struct file *file)
547 * will correctly serialize the operation. We do need to acquire 547 * will correctly serialize the operation. We do need to acquire
548 * "ep->mtx" after "epmutex" because ep_remove() requires it when called 548 * "ep->mtx" after "epmutex" because ep_remove() requires it when called
549 * from anywhere but ep_free(). 549 * from anywhere but ep_free().
550 *
551 * Besides, ep_remove() acquires the lock, so we can't hold it here.
550 */ 552 */
551 mutex_lock(&epmutex); 553 mutex_lock(&epmutex);
552 554
@@ -785,9 +787,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
785 goto error_unregister; 787 goto error_unregister;
786 788
787 /* Add the current item to the list of active epoll hook for this file */ 789 /* Add the current item to the list of active epoll hook for this file */
788 spin_lock(&tfile->f_ep_lock); 790 spin_lock(&tfile->f_lock);
789 list_add_tail(&epi->fllink, &tfile->f_ep_links); 791 list_add_tail(&epi->fllink, &tfile->f_ep_links);
790 spin_unlock(&tfile->f_ep_lock); 792 spin_unlock(&tfile->f_lock);
791 793
792 /* 794 /*
793 * Add the current item to the RB tree. All RB tree operations are 795 * Add the current item to the RB tree. All RB tree operations are
diff --git a/fs/exec.c b/fs/exec.c
index 929b58004b7e..b9f1c144b7a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -45,6 +45,7 @@
45#include <linux/proc_fs.h> 45#include <linux/proc_fs.h>
46#include <linux/mount.h> 46#include <linux/mount.h>
47#include <linux/security.h> 47#include <linux/security.h>
48#include <linux/ima.h>
48#include <linux/syscalls.h> 49#include <linux/syscalls.h>
49#include <linux/tsacct_kern.h> 50#include <linux/tsacct_kern.h>
50#include <linux/cn_proc.h> 51#include <linux/cn_proc.h>
@@ -127,6 +128,9 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
127 MAY_READ | MAY_EXEC | MAY_OPEN); 128 MAY_READ | MAY_EXEC | MAY_OPEN);
128 if (error) 129 if (error)
129 goto exit; 130 goto exit;
131 error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN);
132 if (error)
133 goto exit;
130 134
131 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); 135 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
132 error = PTR_ERR(file); 136 error = PTR_ERR(file);
@@ -674,6 +678,9 @@ struct file *open_exec(const char *name)
674 err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); 678 err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
675 if (err) 679 if (err)
676 goto out_path_put; 680 goto out_path_put;
681 err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN);
682 if (err)
683 goto out_path_put;
677 684
678 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); 685 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
679 if (IS_ERR(file)) 686 if (IS_ERR(file))
@@ -1184,6 +1191,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1184 retval = security_bprm_check(bprm); 1191 retval = security_bprm_check(bprm);
1185 if (retval) 1192 if (retval)
1186 return retval; 1193 return retval;
1194 retval = ima_bprm_check(bprm);
1195 if (retval)
1196 return retval;
1187 1197
1188 /* kernel module loader fixup */ 1198 /* kernel module loader fixup */
1189 /* so we don't try to load run modprobe in kernel space. */ 1199 /* so we don't try to load run modprobe in kernel space. */
@@ -1284,6 +1294,7 @@ int do_execve(char * filename,
1284 retval = mutex_lock_interruptible(&current->cred_exec_mutex); 1294 retval = mutex_lock_interruptible(&current->cred_exec_mutex);
1285 if (retval < 0) 1295 if (retval < 0)
1286 goto out_free; 1296 goto out_free;
1297 current->in_execve = 1;
1287 1298
1288 retval = -ENOMEM; 1299 retval = -ENOMEM;
1289 bprm->cred = prepare_exec_creds(); 1300 bprm->cred = prepare_exec_creds();
@@ -1337,6 +1348,7 @@ int do_execve(char * filename,
1337 goto out; 1348 goto out;
1338 1349
1339 /* execve succeeded */ 1350 /* execve succeeded */
1351 current->in_execve = 0;
1340 mutex_unlock(&current->cred_exec_mutex); 1352 mutex_unlock(&current->cred_exec_mutex);
1341 acct_update_integrals(current); 1353 acct_update_integrals(current);
1342 free_bprm(bprm); 1354 free_bprm(bprm);
@@ -1355,6 +1367,7 @@ out_file:
1355 } 1367 }
1356 1368
1357out_unlock: 1369out_unlock:
1370 current->in_execve = 0;
1358 mutex_unlock(&current->cred_exec_mutex); 1371 mutex_unlock(&current->cred_exec_mutex);
1359 1372
1360out_free: 1373out_free:
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c8f9bd308821..4a09ff169870 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1435,6 +1435,10 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
1435 return 0; 1435 return 0;
1436} 1436}
1437 1437
1438static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
1439{
1440 return !buffer_mapped(bh);
1441}
1438/* 1442/*
1439 * Note that we always start a transaction even if we're not journalling 1443 * Note that we always start a transaction even if we're not journalling
1440 * data. This is to preserve ordering: any hole instantiation within 1444 * data. This is to preserve ordering: any hole instantiation within
@@ -1505,6 +1509,15 @@ static int ext3_ordered_writepage(struct page *page,
1505 if (ext3_journal_current_handle()) 1509 if (ext3_journal_current_handle())
1506 goto out_fail; 1510 goto out_fail;
1507 1511
1512 if (!page_has_buffers(page)) {
1513 create_empty_buffers(page, inode->i_sb->s_blocksize,
1514 (1 << BH_Dirty)|(1 << BH_Uptodate));
1515 } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
1516 /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */
1517 return block_write_full_page(page, NULL, wbc);
1518 }
1519 page_bufs = page_buffers(page);
1520
1508 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1521 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1509 1522
1510 if (IS_ERR(handle)) { 1523 if (IS_ERR(handle)) {
@@ -1512,11 +1525,6 @@ static int ext3_ordered_writepage(struct page *page,
1512 goto out_fail; 1525 goto out_fail;
1513 } 1526 }
1514 1527
1515 if (!page_has_buffers(page)) {
1516 create_empty_buffers(page, inode->i_sb->s_blocksize,
1517 (1 << BH_Dirty)|(1 << BH_Uptodate));
1518 }
1519 page_bufs = page_buffers(page);
1520 walk_page_buffers(handle, page_bufs, 0, 1528 walk_page_buffers(handle, page_bufs, 0,
1521 PAGE_CACHE_SIZE, NULL, bget_one); 1529 PAGE_CACHE_SIZE, NULL, bget_one);
1522 1530
diff --git a/fs/fcntl.c b/fs/fcntl.c
index bd215cc791da..d865ca66ccba 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -141,7 +141,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
141 return ret; 141 return ret;
142} 142}
143 143
144#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) 144#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
145 145
146static int setfl(int fd, struct file * filp, unsigned long arg) 146static int setfl(int fd, struct file * filp, unsigned long arg)
147{ 147{
@@ -177,21 +177,21 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
177 return error; 177 return error;
178 178
179 /* 179 /*
180 * We still need a lock here for now to keep multiple FASYNC calls 180 * ->fasync() is responsible for setting the FASYNC bit.
181 * from racing with each other.
182 */ 181 */
183 lock_kernel(); 182 if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
184 if ((arg ^ filp->f_flags) & FASYNC) { 183 filp->f_op->fasync) {
185 if (filp->f_op && filp->f_op->fasync) { 184 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
186 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 185 if (error < 0)
187 if (error < 0) 186 goto out;
188 goto out; 187 if (error > 0)
189 } 188 error = 0;
190 } 189 }
191 190 spin_lock(&filp->f_lock);
192 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 191 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
192 spin_unlock(&filp->f_lock);
193
193 out: 194 out:
194 unlock_kernel();
195 return error; 195 return error;
196} 196}
197 197
@@ -516,7 +516,7 @@ static DEFINE_RWLOCK(fasync_lock);
516static struct kmem_cache *fasync_cache __read_mostly; 516static struct kmem_cache *fasync_cache __read_mostly;
517 517
518/* 518/*
519 * fasync_helper() is used by some character device drivers (mainly mice) 519 * fasync_helper() is used by almost all character device drivers
520 * to set up the fasync queue. It returns negative on error, 0 if it did 520 * to set up the fasync queue. It returns negative on error, 0 if it did
521 * no changes and positive if it added/deleted the entry. 521 * no changes and positive if it added/deleted the entry.
522 */ 522 */
@@ -555,6 +555,13 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
555 result = 1; 555 result = 1;
556 } 556 }
557out: 557out:
558 /* Fix up FASYNC bit while still holding fasync_lock */
559 spin_lock(&filp->f_lock);
560 if (on)
561 filp->f_flags |= FASYNC;
562 else
563 filp->f_flags &= ~FASYNC;
564 spin_unlock(&filp->f_lock);
558 write_unlock_irq(&fasync_lock); 565 write_unlock_irq(&fasync_lock);
559 return result; 566 return result;
560} 567}
diff --git a/fs/file_table.c b/fs/file_table.c
index bbeeac6efa1a..b74a8e1da913 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/ima.h>
16#include <linux/eventpoll.h> 17#include <linux/eventpoll.h>
17#include <linux/rcupdate.h> 18#include <linux/rcupdate.h>
18#include <linux/mount.h> 19#include <linux/mount.h>
@@ -127,6 +128,7 @@ struct file *get_empty_filp(void)
127 atomic_long_set(&f->f_count, 1); 128 atomic_long_set(&f->f_count, 1);
128 rwlock_init(&f->f_owner.lock); 129 rwlock_init(&f->f_owner.lock);
129 f->f_cred = get_cred(cred); 130 f->f_cred = get_cred(cred);
131 spin_lock_init(&f->f_lock);
130 eventpoll_init_file(f); 132 eventpoll_init_file(f);
131 /* f->f_version: 0 */ 133 /* f->f_version: 0 */
132 return f; 134 return f;
@@ -279,6 +281,7 @@ void __fput(struct file *file)
279 if (file->f_op && file->f_op->release) 281 if (file->f_op && file->f_op->release)
280 file->f_op->release(inode, file); 282 file->f_op->release(inode, file);
281 security_file_free(file); 283 security_file_free(file);
284 ima_file_free(file);
282 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 285 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
283 cdev_put(inode->i_cdev); 286 cdev_put(inode->i_cdev);
284 fops_put(file->f_op); 287 fops_put(file->f_op);
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index e563a6449811..3a981b7f64ca 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,6 +1,10 @@
1config GFS2_FS 1config GFS2_FS
2 tristate "GFS2 file system support" 2 tristate "GFS2 file system support"
3 depends on EXPERIMENTAL && (64BIT || LBD) 3 depends on EXPERIMENTAL && (64BIT || LBD)
4 select DLM if GFS2_FS_LOCKING_DLM
5 select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
6 select SYSFS if GFS2_FS_LOCKING_DLM
7 select IP_SCTP if DLM_SCTP
4 select FS_POSIX_ACL 8 select FS_POSIX_ACL
5 select CRC32 9 select CRC32
6 help 10 help
@@ -18,17 +22,16 @@ config GFS2_FS
18 the locking module below. Documentation and utilities for GFS2 can 22 the locking module below. Documentation and utilities for GFS2 can
19 be found here: http://sources.redhat.com/cluster 23 be found here: http://sources.redhat.com/cluster
20 24
21 The "nolock" lock module is now built in to GFS2 by default. 25 The "nolock" lock module is now built in to GFS2 by default. If
26 you want to use the DLM, be sure to enable HOTPLUG and IPv4/6
27 networking.
22 28
23config GFS2_FS_LOCKING_DLM 29config GFS2_FS_LOCKING_DLM
24 tristate "GFS2 DLM locking module" 30 bool "GFS2 DLM locking"
25 depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n) 31 depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
26 select IP_SCTP if DLM_SCTP
27 select CONFIGFS_FS
28 select DLM
29 help 32 help
30 Multiple node locking module for GFS2 33 Multiple node locking module for GFS2
31 34
32 Most users of GFS2 will require this module. It provides the locking 35 Most users of GFS2 will require this. It provides the locking
33 interface between GFS2 and the DLM, which is required to use GFS2 36 interface between GFS2 and the DLM, which is required to use GFS2
34 in a cluster environment. 37 in a cluster environment.
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index c1b4ec6a9650..a851ea4bdf70 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o 1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ 2gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
3 glops.o inode.o log.o lops.o locking.o main.o meta_io.o \ 3 glops.o inode.o log.o lops.o main.o meta_io.o \
4 mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ 4 mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
5 ops_fstype.o ops_inode.o ops_super.o quota.o \ 5 ops_fstype.o ops_inode.o ops_super.o quota.o \
6 recovery.o rgrp.o super.o sys.o trans.o util.o 6 recovery.o rgrp.o super.o sys.o trans.o util.o
7 7
8obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ 8gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
9 9
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index e335dceb6a4f..43764f4fa763 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -15,7 +15,6 @@
15#include <linux/posix_acl.h> 15#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h> 16#include <linux/posix_acl_xattr.h>
17#include <linux/gfs2_ondisk.h> 17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19 18
20#include "gfs2.h" 19#include "gfs2.h"
21#include "incore.h" 20#include "incore.h"
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 11ffc56f1f81..3a5d3f883e10 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/crc32.h> 15#include <linux/crc32.h>
16#include <linux/lm_interface.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
19#include "incore.h" 18#include "incore.h"
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index b7c8e5c70791..aef4d0c06748 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -60,7 +60,6 @@
60#include <linux/gfs2_ondisk.h> 60#include <linux/gfs2_ondisk.h>
61#include <linux/crc32.h> 61#include <linux/crc32.h>
62#include <linux/vmalloc.h> 62#include <linux/vmalloc.h>
63#include <linux/lm_interface.h>
64 63
65#include "gfs2.h" 64#include "gfs2.h"
66#include "incore.h" 65#include "incore.h"
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index f114ba2b3557..dee9b03e5b37 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -14,7 +14,6 @@
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/xattr.h> 15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/uaccess.h> 17#include <asm/uaccess.h>
19 18
20#include "gfs2.h" 19#include "gfs2.h"
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 0d1c76d906ae..899763aed217 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/xattr.h> 14#include <linux/xattr.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17#include <asm/uaccess.h> 16#include <asm/uaccess.h>
18 17
19#include "gfs2.h" 18#include "gfs2.h"
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6b983aef785d..3984e47d1d33 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -10,7 +10,6 @@
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
15#include <linux/delay.h> 14#include <linux/delay.h>
16#include <linux/sort.h> 15#include <linux/sort.h>
@@ -18,7 +17,6 @@
18#include <linux/kallsyms.h> 17#include <linux/kallsyms.h>
19#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
20#include <linux/list.h> 19#include <linux/list.h>
21#include <linux/lm_interface.h>
22#include <linux/wait.h> 20#include <linux/wait.h>
23#include <linux/module.h> 21#include <linux/module.h>
24#include <linux/rwsem.h> 22#include <linux/rwsem.h>
@@ -155,13 +153,10 @@ static void glock_free(struct gfs2_glock *gl)
155 struct gfs2_sbd *sdp = gl->gl_sbd; 153 struct gfs2_sbd *sdp = gl->gl_sbd;
156 struct inode *aspace = gl->gl_aspace; 154 struct inode *aspace = gl->gl_aspace;
157 155
158 if (sdp->sd_lockstruct.ls_ops->lm_put_lock)
159 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock);
160
161 if (aspace) 156 if (aspace)
162 gfs2_aspace_put(aspace); 157 gfs2_aspace_put(aspace);
163 158
164 kmem_cache_free(gfs2_glock_cachep, gl); 159 sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
165} 160}
166 161
167/** 162/**
@@ -172,6 +167,7 @@ static void glock_free(struct gfs2_glock *gl)
172 167
173static void gfs2_glock_hold(struct gfs2_glock *gl) 168static void gfs2_glock_hold(struct gfs2_glock *gl)
174{ 169{
170 GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0);
175 atomic_inc(&gl->gl_ref); 171 atomic_inc(&gl->gl_ref);
176} 172}
177 173
@@ -211,17 +207,15 @@ int gfs2_glock_put(struct gfs2_glock *gl)
211 atomic_dec(&lru_count); 207 atomic_dec(&lru_count);
212 } 208 }
213 spin_unlock(&lru_lock); 209 spin_unlock(&lru_lock);
214 GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED);
215 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_lru));
216 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 210 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
217 glock_free(gl); 211 glock_free(gl);
218 rv = 1; 212 rv = 1;
219 goto out; 213 goto out;
220 } 214 }
221 write_unlock(gl_lock_addr(gl->gl_hash));
222 /* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */ 215 /* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */
223 if (atomic_read(&gl->gl_ref) == 2) 216 if (atomic_read(&gl->gl_ref) == 2)
224 gfs2_glock_schedule_for_reclaim(gl); 217 gfs2_glock_schedule_for_reclaim(gl);
218 write_unlock(gl_lock_addr(gl->gl_hash));
225out: 219out:
226 return rv; 220 return rv;
227} 221}
@@ -256,27 +250,6 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
256} 250}
257 251
258/** 252/**
259 * gfs2_glock_find() - Find glock by lock number
260 * @sdp: The GFS2 superblock
261 * @name: The lock name
262 *
263 * Returns: NULL, or the struct gfs2_glock with the requested number
264 */
265
266static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
267 const struct lm_lockname *name)
268{
269 unsigned int hash = gl_hash(sdp, name);
270 struct gfs2_glock *gl;
271
272 read_lock(gl_lock_addr(hash));
273 gl = search_bucket(hash, sdp, name);
274 read_unlock(gl_lock_addr(hash));
275
276 return gl;
277}
278
279/**
280 * may_grant - check if its ok to grant a new lock 253 * may_grant - check if its ok to grant a new lock
281 * @gl: The glock 254 * @gl: The glock
282 * @gh: The lock request which we wish to grant 255 * @gh: The lock request which we wish to grant
@@ -523,7 +496,7 @@ out_locked:
523} 496}
524 497
525static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, 498static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
526 unsigned int cur_state, unsigned int req_state, 499 unsigned int req_state,
527 unsigned int flags) 500 unsigned int flags)
528{ 501{
529 int ret = LM_OUT_ERROR; 502 int ret = LM_OUT_ERROR;
@@ -532,7 +505,7 @@ static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
532 return req_state == LM_ST_UNLOCKED ? 0 : req_state; 505 return req_state == LM_ST_UNLOCKED ? 0 : req_state;
533 506
534 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 507 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
535 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, 508 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
536 req_state, flags); 509 req_state, flags);
537 return ret; 510 return ret;
538} 511}
@@ -575,7 +548,7 @@ __acquires(&gl->gl_spin)
575 gl->gl_state == LM_ST_DEFERRED) && 548 gl->gl_state == LM_ST_DEFERRED) &&
576 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 549 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
577 lck_flags |= LM_FLAG_TRY_1CB; 550 lck_flags |= LM_FLAG_TRY_1CB;
578 ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, target, lck_flags); 551 ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
579 552
580 if (!(ret & LM_OUT_ASYNC)) { 553 if (!(ret & LM_OUT_ASYNC)) {
581 finish_xmote(gl, ret); 554 finish_xmote(gl, ret);
@@ -624,10 +597,11 @@ __acquires(&gl->gl_spin)
624 597
625 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); 598 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
626 599
600 down_read(&gfs2_umount_flush_sem);
627 if (test_bit(GLF_DEMOTE, &gl->gl_flags) && 601 if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
628 gl->gl_demote_state != gl->gl_state) { 602 gl->gl_demote_state != gl->gl_state) {
629 if (find_first_holder(gl)) 603 if (find_first_holder(gl))
630 goto out; 604 goto out_unlock;
631 if (nonblock) 605 if (nonblock)
632 goto out_sched; 606 goto out_sched;
633 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 607 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
@@ -638,23 +612,26 @@ __acquires(&gl->gl_spin)
638 gfs2_demote_wake(gl); 612 gfs2_demote_wake(gl);
639 ret = do_promote(gl); 613 ret = do_promote(gl);
640 if (ret == 0) 614 if (ret == 0)
641 goto out; 615 goto out_unlock;
642 if (ret == 2) 616 if (ret == 2)
643 return; 617 goto out_sem;
644 gh = find_first_waiter(gl); 618 gh = find_first_waiter(gl);
645 gl->gl_target = gh->gh_state; 619 gl->gl_target = gh->gh_state;
646 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 620 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
647 do_error(gl, 0); /* Fail queued try locks */ 621 do_error(gl, 0); /* Fail queued try locks */
648 } 622 }
649 do_xmote(gl, gh, gl->gl_target); 623 do_xmote(gl, gh, gl->gl_target);
624out_sem:
625 up_read(&gfs2_umount_flush_sem);
650 return; 626 return;
651 627
652out_sched: 628out_sched:
653 gfs2_glock_hold(gl); 629 gfs2_glock_hold(gl);
654 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 630 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
655 gfs2_glock_put(gl); 631 gfs2_glock_put(gl);
656out: 632out_unlock:
657 clear_bit(GLF_LOCK, &gl->gl_flags); 633 clear_bit(GLF_LOCK, &gl->gl_flags);
634 goto out_sem;
658} 635}
659 636
660static void glock_work_func(struct work_struct *work) 637static void glock_work_func(struct work_struct *work)
@@ -681,18 +658,6 @@ static void glock_work_func(struct work_struct *work)
681 gfs2_glock_put(gl); 658 gfs2_glock_put(gl);
682} 659}
683 660
684static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
685 void **lockp)
686{
687 int error = -EIO;
688 if (!sdp->sd_lockstruct.ls_ops->lm_get_lock)
689 return 0;
690 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
691 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
692 sdp->sd_lockstruct.ls_lockspace, name, lockp);
693 return error;
694}
695
696/** 661/**
697 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist 662 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
698 * @sdp: The GFS2 superblock 663 * @sdp: The GFS2 superblock
@@ -719,10 +684,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
719 gl = search_bucket(hash, sdp, &name); 684 gl = search_bucket(hash, sdp, &name);
720 read_unlock(gl_lock_addr(hash)); 685 read_unlock(gl_lock_addr(hash));
721 686
722 if (gl || !create) { 687 *glp = gl;
723 *glp = gl; 688 if (gl)
724 return 0; 689 return 0;
725 } 690 if (!create)
691 return -ENOENT;
726 692
727 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); 693 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
728 if (!gl) 694 if (!gl)
@@ -736,7 +702,9 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
736 gl->gl_demote_state = LM_ST_EXCLUSIVE; 702 gl->gl_demote_state = LM_ST_EXCLUSIVE;
737 gl->gl_hash = hash; 703 gl->gl_hash = hash;
738 gl->gl_ops = glops; 704 gl->gl_ops = glops;
739 gl->gl_stamp = jiffies; 705 snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number);
706 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
707 gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
740 gl->gl_tchange = jiffies; 708 gl->gl_tchange = jiffies;
741 gl->gl_object = NULL; 709 gl->gl_object = NULL;
742 gl->gl_sbd = sdp; 710 gl->gl_sbd = sdp;
@@ -753,10 +721,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
753 } 721 }
754 } 722 }
755 723
756 error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
757 if (error)
758 goto fail_aspace;
759
760 write_lock(gl_lock_addr(hash)); 724 write_lock(gl_lock_addr(hash));
761 tmp = search_bucket(hash, sdp, &name); 725 tmp = search_bucket(hash, sdp, &name);
762 if (tmp) { 726 if (tmp) {
@@ -772,9 +736,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
772 736
773 return 0; 737 return 0;
774 738
775fail_aspace:
776 if (gl->gl_aspace)
777 gfs2_aspace_put(gl->gl_aspace);
778fail: 739fail:
779 kmem_cache_free(gfs2_glock_cachep, gl); 740 kmem_cache_free(gfs2_glock_cachep, gl);
780 return error; 741 return error;
@@ -966,7 +927,7 @@ do_cancel:
966 if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { 927 if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
967 spin_unlock(&gl->gl_spin); 928 spin_unlock(&gl->gl_spin);
968 if (sdp->sd_lockstruct.ls_ops->lm_cancel) 929 if (sdp->sd_lockstruct.ls_ops->lm_cancel)
969 sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); 930 sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
970 spin_lock(&gl->gl_spin); 931 spin_lock(&gl->gl_spin);
971 } 932 }
972 return; 933 return;
@@ -1051,7 +1012,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1051 spin_lock(&gl->gl_spin); 1012 spin_lock(&gl->gl_spin);
1052 clear_bit(GLF_LOCK, &gl->gl_flags); 1013 clear_bit(GLF_LOCK, &gl->gl_flags);
1053 } 1014 }
1054 gl->gl_stamp = jiffies;
1055 if (list_empty(&gl->gl_holders) && 1015 if (list_empty(&gl->gl_holders) &&
1056 !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1016 !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1057 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1017 !test_bit(GLF_DEMOTE, &gl->gl_flags))
@@ -1240,70 +1200,13 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1240 gfs2_glock_dq_uninit(&ghs[x]); 1200 gfs2_glock_dq_uninit(&ghs[x]);
1241} 1201}
1242 1202
1243static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) 1203void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1244{
1245 int error = -EIO;
1246 if (!sdp->sd_lockstruct.ls_ops->lm_hold_lvb)
1247 return 0;
1248 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1249 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
1250 return error;
1251}
1252
1253/**
1254 * gfs2_lvb_hold - attach a LVB from a glock
1255 * @gl: The glock in question
1256 *
1257 */
1258
1259int gfs2_lvb_hold(struct gfs2_glock *gl)
1260{
1261 int error;
1262
1263 if (!atomic_read(&gl->gl_lvb_count)) {
1264 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
1265 if (error)
1266 return error;
1267 gfs2_glock_hold(gl);
1268 }
1269 atomic_inc(&gl->gl_lvb_count);
1270
1271 return 0;
1272}
1273
1274/**
1275 * gfs2_lvb_unhold - detach a LVB from a glock
1276 * @gl: The glock in question
1277 *
1278 */
1279
1280void gfs2_lvb_unhold(struct gfs2_glock *gl)
1281{
1282 struct gfs2_sbd *sdp = gl->gl_sbd;
1283
1284 gfs2_glock_hold(gl);
1285 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1286 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1287 if (sdp->sd_lockstruct.ls_ops->lm_unhold_lvb)
1288 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb);
1289 gl->gl_lvb = NULL;
1290 gfs2_glock_put(gl);
1291 }
1292 gfs2_glock_put(gl);
1293}
1294
1295static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1296 unsigned int state)
1297{ 1204{
1298 struct gfs2_glock *gl;
1299 unsigned long delay = 0; 1205 unsigned long delay = 0;
1300 unsigned long holdtime; 1206 unsigned long holdtime;
1301 unsigned long now = jiffies; 1207 unsigned long now = jiffies;
1302 1208
1303 gl = gfs2_glock_find(sdp, name); 1209 gfs2_glock_hold(gl);
1304 if (!gl)
1305 return;
1306
1307 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; 1210 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
1308 if (time_before(now, holdtime)) 1211 if (time_before(now, holdtime))
1309 delay = holdtime - now; 1212 delay = holdtime - now;
@@ -1317,74 +1220,33 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1317 gfs2_glock_put(gl); 1220 gfs2_glock_put(gl);
1318} 1221}
1319 1222
1320static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
1321{
1322 struct gfs2_jdesc *jd;
1323
1324 spin_lock(&sdp->sd_jindex_spin);
1325 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
1326 if (jd->jd_jid != jid)
1327 continue;
1328 jd->jd_dirty = 1;
1329 break;
1330 }
1331 spin_unlock(&sdp->sd_jindex_spin);
1332}
1333
1334/** 1223/**
1335 * gfs2_glock_cb - Callback used by locking module 1224 * gfs2_glock_complete - Callback used by locking
1336 * @sdp: Pointer to the superblock 1225 * @gl: Pointer to the glock
1337 * @type: Type of callback 1226 * @ret: The return value from the dlm
1338 * @data: Type dependent data pointer
1339 * 1227 *
1340 * Called by the locking module when it wants to tell us something.
1341 * Either we need to drop a lock, one of our ASYNC requests completed, or
1342 * a journal from another client needs to be recovered.
1343 */ 1228 */
1344 1229
1345void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) 1230void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1346{ 1231{
1347 struct gfs2_sbd *sdp = cb_data; 1232 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
1348 1233 gl->gl_reply = ret;
1349 switch (type) { 1234 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
1350 case LM_CB_NEED_E: 1235 struct gfs2_holder *gh;
1351 blocking_cb(sdp, data, LM_ST_UNLOCKED); 1236 spin_lock(&gl->gl_spin);
1352 return; 1237 gh = find_first_waiter(gl);
1353 1238 if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) &&
1354 case LM_CB_NEED_D: 1239 (gl->gl_target != LM_ST_UNLOCKED)) ||
1355 blocking_cb(sdp, data, LM_ST_DEFERRED); 1240 ((ret & ~LM_OUT_ST_MASK) != 0))
1356 return; 1241 set_bit(GLF_FROZEN, &gl->gl_flags);
1357 1242 spin_unlock(&gl->gl_spin);
1358 case LM_CB_NEED_S: 1243 if (test_bit(GLF_FROZEN, &gl->gl_flags))
1359 blocking_cb(sdp, data, LM_ST_SHARED);
1360 return;
1361
1362 case LM_CB_ASYNC: {
1363 struct lm_async_cb *async = data;
1364 struct gfs2_glock *gl;
1365
1366 down_read(&gfs2_umount_flush_sem);
1367 gl = gfs2_glock_find(sdp, &async->lc_name);
1368 if (gfs2_assert_warn(sdp, gl))
1369 return; 1244 return;
1370 gl->gl_reply = async->lc_ret;
1371 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1372 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1373 gfs2_glock_put(gl);
1374 up_read(&gfs2_umount_flush_sem);
1375 return;
1376 }
1377
1378 case LM_CB_NEED_RECOVERY:
1379 gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
1380 if (sdp->sd_recoverd_process)
1381 wake_up_process(sdp->sd_recoverd_process);
1382 return;
1383
1384 default:
1385 gfs2_assert_warn(sdp, 0);
1386 return;
1387 } 1245 }
1246 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1247 gfs2_glock_hold(gl);
1248 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1249 gfs2_glock_put(gl);
1388} 1250}
1389 1251
1390/** 1252/**
@@ -1515,6 +1377,25 @@ out:
1515 return has_entries; 1377 return has_entries;
1516} 1378}
1517 1379
1380
1381/**
1382 * thaw_glock - thaw out a glock which has an unprocessed reply waiting
1383 * @gl: The glock to thaw
1384 *
1385 * N.B. When we freeze a glock, we leave a ref to the glock outstanding,
1386 * so this has to result in the ref count being dropped by one.
1387 */
1388
1389static void thaw_glock(struct gfs2_glock *gl)
1390{
1391 if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
1392 return;
1393 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1394 gfs2_glock_hold(gl);
1395 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1396 gfs2_glock_put(gl);
1397}
1398
1518/** 1399/**
1519 * clear_glock - look at a glock and see if we can free it from glock cache 1400 * clear_glock - look at a glock and see if we can free it from glock cache
1520 * @gl: the glock to look at 1401 * @gl: the glock to look at
@@ -1540,6 +1421,20 @@ static void clear_glock(struct gfs2_glock *gl)
1540} 1421}
1541 1422
1542/** 1423/**
1424 * gfs2_glock_thaw - Thaw any frozen glocks
1425 * @sdp: The super block
1426 *
1427 */
1428
1429void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1430{
1431 unsigned x;
1432
1433 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1434 examine_bucket(thaw_glock, sdp, x);
1435}
1436
1437/**
1543 * gfs2_gl_hash_clear - Empty out the glock hash table 1438 * gfs2_gl_hash_clear - Empty out the glock hash table
1544 * @sdp: the filesystem 1439 * @sdp: the filesystem
1545 * @wait: wait until it's all gone 1440 * @wait: wait until it's all gone
@@ -1619,7 +1514,7 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
1619 if (flags & LM_FLAG_NOEXP) 1514 if (flags & LM_FLAG_NOEXP)
1620 *p++ = 'e'; 1515 *p++ = 'e';
1621 if (flags & LM_FLAG_ANY) 1516 if (flags & LM_FLAG_ANY)
1622 *p++ = 'a'; 1517 *p++ = 'A';
1623 if (flags & LM_FLAG_PRIORITY) 1518 if (flags & LM_FLAG_PRIORITY)
1624 *p++ = 'p'; 1519 *p++ = 'p';
1625 if (flags & GL_ASYNC) 1520 if (flags & GL_ASYNC)
@@ -1683,6 +1578,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
1683 *p++ = 'i'; 1578 *p++ = 'i';
1684 if (test_bit(GLF_REPLY_PENDING, gflags)) 1579 if (test_bit(GLF_REPLY_PENDING, gflags))
1685 *p++ = 'r'; 1580 *p++ = 'r';
1581 if (test_bit(GLF_INITIAL, gflags))
1582 *p++ = 'I';
1583 if (test_bit(GLF_FROZEN, gflags))
1584 *p++ = 'F';
1686 *p = 0; 1585 *p = 0;
1687 return buf; 1586 return buf;
1688} 1587}
@@ -1717,14 +1616,13 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1717 dtime *= 1000000/HZ; /* demote time in uSec */ 1616 dtime *= 1000000/HZ; /* demote time in uSec */
1718 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1617 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1719 dtime = 0; 1618 dtime = 0;
1720 gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu l:%d a:%d r:%d\n", 1619 gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu a:%d r:%d\n",
1721 state2str(gl->gl_state), 1620 state2str(gl->gl_state),
1722 gl->gl_name.ln_type, 1621 gl->gl_name.ln_type,
1723 (unsigned long long)gl->gl_name.ln_number, 1622 (unsigned long long)gl->gl_name.ln_number,
1724 gflags2str(gflags_buf, &gl->gl_flags), 1623 gflags2str(gflags_buf, &gl->gl_flags),
1725 state2str(gl->gl_target), 1624 state2str(gl->gl_target),
1726 state2str(gl->gl_demote_state), dtime, 1625 state2str(gl->gl_demote_state), dtime,
1727 atomic_read(&gl->gl_lvb_count),
1728 atomic_read(&gl->gl_ail_count), 1626 atomic_read(&gl->gl_ail_count),
1729 atomic_read(&gl->gl_ref)); 1627 atomic_read(&gl->gl_ref));
1730 1628
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 543ec7ecfbda..a602a28f6f08 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -11,15 +11,130 @@
11#define __GLOCK_DOT_H__ 11#define __GLOCK_DOT_H__
12 12
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/parser.h>
14#include "incore.h" 15#include "incore.h"
15 16
16/* Flags for lock requests; used in gfs2_holder gh_flag field. 17/* Options for hostdata parser */
17 From lm_interface.h: 18
19enum {
20 Opt_jid,
21 Opt_id,
22 Opt_first,
23 Opt_nodir,
24 Opt_err,
25};
26
27/*
28 * lm_lockname types
29 */
30
31#define LM_TYPE_RESERVED 0x00
32#define LM_TYPE_NONDISK 0x01
33#define LM_TYPE_INODE 0x02
34#define LM_TYPE_RGRP 0x03
35#define LM_TYPE_META 0x04
36#define LM_TYPE_IOPEN 0x05
37#define LM_TYPE_FLOCK 0x06
38#define LM_TYPE_PLOCK 0x07
39#define LM_TYPE_QUOTA 0x08
40#define LM_TYPE_JOURNAL 0x09
41
42/*
43 * lm_lock() states
44 *
45 * SHARED is compatible with SHARED, not with DEFERRED or EX.
46 * DEFERRED is compatible with DEFERRED, not with SHARED or EX.
47 */
48
49#define LM_ST_UNLOCKED 0
50#define LM_ST_EXCLUSIVE 1
51#define LM_ST_DEFERRED 2
52#define LM_ST_SHARED 3
53
54/*
55 * lm_lock() flags
56 *
57 * LM_FLAG_TRY
58 * Don't wait to acquire the lock if it can't be granted immediately.
59 *
60 * LM_FLAG_TRY_1CB
61 * Send one blocking callback if TRY is set and the lock is not granted.
62 *
63 * LM_FLAG_NOEXP
64 * GFS sets this flag on lock requests it makes while doing journal recovery.
65 * These special requests should not be blocked due to the recovery like
66 * ordinary locks would be.
67 *
68 * LM_FLAG_ANY
69 * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
70 * also be granted in SHARED. The preferred state is whichever is compatible
71 * with other granted locks, or the specified state if no other locks exist.
72 *
73 * LM_FLAG_PRIORITY
74 * Override fairness considerations. Suppose a lock is held in a shared state
75 * and there is a pending request for the deferred state. A shared lock
76 * request with the priority flag would be allowed to bypass the deferred
77 * request and directly join the other shared lock. A shared lock request
78 * without the priority flag might be forced to wait until the deferred
79 * requested had acquired and released the lock.
80 */
81
18#define LM_FLAG_TRY 0x00000001 82#define LM_FLAG_TRY 0x00000001
19#define LM_FLAG_TRY_1CB 0x00000002 83#define LM_FLAG_TRY_1CB 0x00000002
20#define LM_FLAG_NOEXP 0x00000004 84#define LM_FLAG_NOEXP 0x00000004
21#define LM_FLAG_ANY 0x00000008 85#define LM_FLAG_ANY 0x00000008
22#define LM_FLAG_PRIORITY 0x00000010 */ 86#define LM_FLAG_PRIORITY 0x00000010
87#define GL_ASYNC 0x00000040
88#define GL_EXACT 0x00000080
89#define GL_SKIP 0x00000100
90#define GL_ATIME 0x00000200
91#define GL_NOCACHE 0x00000400
92
93/*
94 * lm_lock() and lm_async_cb return flags
95 *
96 * LM_OUT_ST_MASK
97 * Masks the lower two bits of lock state in the returned value.
98 *
99 * LM_OUT_CANCELED
100 * The lock request was canceled.
101 *
102 * LM_OUT_ASYNC
103 * The result of the request will be returned in an LM_CB_ASYNC callback.
104 *
105 */
106
107#define LM_OUT_ST_MASK 0x00000003
108#define LM_OUT_CANCELED 0x00000008
109#define LM_OUT_ASYNC 0x00000080
110#define LM_OUT_ERROR 0x00000100
111
112/*
113 * lm_recovery_done() messages
114 */
115
116#define LM_RD_GAVEUP 308
117#define LM_RD_SUCCESS 309
118
119#define GLR_TRYFAILED 13
120
121struct lm_lockops {
122 const char *lm_proto_name;
123 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
124 void (*lm_unmount) (struct gfs2_sbd *sdp);
125 void (*lm_withdraw) (struct gfs2_sbd *sdp);
126 void (*lm_put_lock) (struct kmem_cache *cachep, void *gl);
127 unsigned int (*lm_lock) (struct gfs2_glock *gl,
128 unsigned int req_state, unsigned int flags);
129 void (*lm_cancel) (struct gfs2_glock *gl);
130 const match_table_t *lm_tokens;
131};
132
133#define LM_FLAG_TRY 0x00000001
134#define LM_FLAG_TRY_1CB 0x00000002
135#define LM_FLAG_NOEXP 0x00000004
136#define LM_FLAG_ANY 0x00000008
137#define LM_FLAG_PRIORITY 0x00000010
23 138
24#define GL_ASYNC 0x00000040 139#define GL_ASYNC 0x00000040
25#define GL_EXACT 0x00000080 140#define GL_EXACT 0x00000080
@@ -128,10 +243,12 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
128int gfs2_lvb_hold(struct gfs2_glock *gl); 243int gfs2_lvb_hold(struct gfs2_glock *gl);
129void gfs2_lvb_unhold(struct gfs2_glock *gl); 244void gfs2_lvb_unhold(struct gfs2_glock *gl);
130 245
131void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); 246void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
247void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
132void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 248void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
133void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 249void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
134void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 250void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
251void gfs2_glock_thaw(struct gfs2_sbd *sdp);
135 252
136int __init gfs2_glock_init(void); 253int __init gfs2_glock_init(void);
137void gfs2_glock_exit(void); 254void gfs2_glock_exit(void);
@@ -141,4 +258,6 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
141int gfs2_register_debugfs(void); 258int gfs2_register_debugfs(void);
142void gfs2_unregister_debugfs(void); 259void gfs2_unregister_debugfs(void);
143 260
261extern const struct lm_lockops gfs2_dlm_ops;
262
144#endif /* __GLOCK_DOT_H__ */ 263#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 8522d3aa64fc..bf23a62aa925 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -12,7 +12,6 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/lm_interface.h>
16#include <linux/bio.h> 15#include <linux/bio.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
@@ -38,20 +37,25 @@
38static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 37static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
39{ 38{
40 struct gfs2_sbd *sdp = gl->gl_sbd; 39 struct gfs2_sbd *sdp = gl->gl_sbd;
41 unsigned int blocks;
42 struct list_head *head = &gl->gl_ail_list; 40 struct list_head *head = &gl->gl_ail_list;
43 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
44 struct buffer_head *bh; 42 struct buffer_head *bh;
45 int error; 43 struct gfs2_trans tr;
46 44
47 blocks = atomic_read(&gl->gl_ail_count); 45 memset(&tr, 0, sizeof(tr));
48 if (!blocks) 46 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
49 return;
50 47
51 error = gfs2_trans_begin(sdp, 0, blocks); 48 if (!tr.tr_revokes)
52 if (gfs2_assert_withdraw(sdp, !error))
53 return; 49 return;
54 50
51 /* A shortened, inline version of gfs2_trans_begin() */
52 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
53 tr.tr_ip = (unsigned long)__builtin_return_address(0);
54 INIT_LIST_HEAD(&tr.tr_list_buf);
55 gfs2_log_reserve(sdp, tr.tr_reserved);
56 BUG_ON(current->journal_info);
57 current->journal_info = &tr;
58
55 gfs2_log_lock(sdp); 59 gfs2_log_lock(sdp);
56 while (!list_empty(head)) { 60 while (!list_empty(head)) {
57 bd = list_entry(head->next, struct gfs2_bufdata, 61 bd = list_entry(head->next, struct gfs2_bufdata,
@@ -72,29 +76,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
72} 76}
73 77
74/** 78/**
75 * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock 79 * rgrp_go_sync - sync out the metadata for this glock
76 * @gl: the glock
77 *
78 */
79
80static void gfs2_pte_inval(struct gfs2_glock *gl)
81{
82 struct gfs2_inode *ip;
83 struct inode *inode;
84
85 ip = gl->gl_object;
86 inode = &ip->i_inode;
87 if (!ip || !S_ISREG(inode->i_mode))
88 return;
89
90 unmap_shared_mapping_range(inode->i_mapping, 0, 0);
91 if (test_bit(GIF_SW_PAGED, &ip->i_flags))
92 set_bit(GLF_DIRTY, &gl->gl_flags);
93
94}
95
96/**
97 * meta_go_sync - sync out the metadata for this glock
98 * @gl: the glock 80 * @gl: the glock
99 * 81 *
100 * Called when demoting or unlocking an EX glock. We must flush 82 * Called when demoting or unlocking an EX glock. We must flush
@@ -102,36 +84,42 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
102 * not return to caller to demote/unlock the glock until I/O is complete. 84 * not return to caller to demote/unlock the glock until I/O is complete.
103 */ 85 */
104 86
105static void meta_go_sync(struct gfs2_glock *gl) 87static void rgrp_go_sync(struct gfs2_glock *gl)
106{ 88{
107 if (gl->gl_state != LM_ST_EXCLUSIVE) 89 struct address_space *metamapping = gl->gl_aspace->i_mapping;
90 int error;
91
92 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
108 return; 93 return;
94 BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
109 95
110 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { 96 gfs2_log_flush(gl->gl_sbd, gl);
111 gfs2_log_flush(gl->gl_sbd, gl); 97 filemap_fdatawrite(metamapping);
112 gfs2_meta_sync(gl); 98 error = filemap_fdatawait(metamapping);
113 gfs2_ail_empty_gl(gl); 99 mapping_set_error(metamapping, error);
114 } 100 gfs2_ail_empty_gl(gl);
115} 101}
116 102
117/** 103/**
118 * meta_go_inval - invalidate the metadata for this glock 104 * rgrp_go_inval - invalidate the metadata for this glock
119 * @gl: the glock 105 * @gl: the glock
120 * @flags: 106 * @flags:
121 * 107 *
108 * We never used LM_ST_DEFERRED with resource groups, so that we
109 * should always see the metadata flag set here.
110 *
122 */ 111 */
123 112
124static void meta_go_inval(struct gfs2_glock *gl, int flags) 113static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
125{ 114{
126 if (!(flags & DIO_METADATA)) 115 struct address_space *mapping = gl->gl_aspace->i_mapping;
127 return;
128 116
129 gfs2_meta_inval(gl); 117 BUG_ON(!(flags & DIO_METADATA));
130 if (gl->gl_object == GFS2_I(gl->gl_sbd->sd_rindex)) 118 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
131 gl->gl_sbd->sd_rindex_uptodate = 0; 119 truncate_inode_pages(mapping, 0);
132 else if (gl->gl_ops == &gfs2_rgrp_glops && gl->gl_object) {
133 struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
134 120
121 if (gl->gl_object) {
122 struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
135 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 123 rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
136 } 124 }
137} 125}
@@ -148,48 +136,54 @@ static void inode_go_sync(struct gfs2_glock *gl)
148 struct address_space *metamapping = gl->gl_aspace->i_mapping; 136 struct address_space *metamapping = gl->gl_aspace->i_mapping;
149 int error; 137 int error;
150 138
151 if (gl->gl_state != LM_ST_UNLOCKED)
152 gfs2_pte_inval(gl);
153 if (gl->gl_state != LM_ST_EXCLUSIVE)
154 return;
155
156 if (ip && !S_ISREG(ip->i_inode.i_mode)) 139 if (ip && !S_ISREG(ip->i_inode.i_mode))
157 ip = NULL; 140 ip = NULL;
141 if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
142 unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
143 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
144 return;
158 145
159 if (test_bit(GLF_DIRTY, &gl->gl_flags)) { 146 BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
160 gfs2_log_flush(gl->gl_sbd, gl); 147
161 filemap_fdatawrite(metamapping); 148 gfs2_log_flush(gl->gl_sbd, gl);
162 if (ip) { 149 filemap_fdatawrite(metamapping);
163 struct address_space *mapping = ip->i_inode.i_mapping; 150 if (ip) {
164 filemap_fdatawrite(mapping); 151 struct address_space *mapping = ip->i_inode.i_mapping;
165 error = filemap_fdatawait(mapping); 152 filemap_fdatawrite(mapping);
166 mapping_set_error(mapping, error); 153 error = filemap_fdatawait(mapping);
167 } 154 mapping_set_error(mapping, error);
168 error = filemap_fdatawait(metamapping);
169 mapping_set_error(metamapping, error);
170 clear_bit(GLF_DIRTY, &gl->gl_flags);
171 gfs2_ail_empty_gl(gl);
172 } 155 }
156 error = filemap_fdatawait(metamapping);
157 mapping_set_error(metamapping, error);
158 gfs2_ail_empty_gl(gl);
173} 159}
174 160
175/** 161/**
176 * inode_go_inval - prepare a inode glock to be released 162 * inode_go_inval - prepare a inode glock to be released
177 * @gl: the glock 163 * @gl: the glock
178 * @flags: 164 * @flags:
165 *
166 * Normally we invlidate everything, but if we are moving into
167 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
168 * can keep hold of the metadata, since it won't have changed.
179 * 169 *
180 */ 170 */
181 171
182static void inode_go_inval(struct gfs2_glock *gl, int flags) 172static void inode_go_inval(struct gfs2_glock *gl, int flags)
183{ 173{
184 struct gfs2_inode *ip = gl->gl_object; 174 struct gfs2_inode *ip = gl->gl_object;
185 int meta = (flags & DIO_METADATA);
186 175
187 if (meta) { 176 gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
188 gfs2_meta_inval(gl); 177
178 if (flags & DIO_METADATA) {
179 struct address_space *mapping = gl->gl_aspace->i_mapping;
180 truncate_inode_pages(mapping, 0);
189 if (ip) 181 if (ip)
190 set_bit(GIF_INVALID, &ip->i_flags); 182 set_bit(GIF_INVALID, &ip->i_flags);
191 } 183 }
192 184
185 if (ip == GFS2_I(gl->gl_sbd->sd_rindex))
186 gl->gl_sbd->sd_rindex_uptodate = 0;
193 if (ip && S_ISREG(ip->i_inode.i_mode)) 187 if (ip && S_ISREG(ip->i_inode.i_mode))
194 truncate_inode_pages(ip->i_inode.i_mapping, 0); 188 truncate_inode_pages(ip->i_inode.i_mapping, 0);
195} 189}
@@ -390,20 +384,7 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
390 return 0; 384 return 0;
391} 385}
392 386
393/**
394 * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
395 * @gl: the glock
396 *
397 * Returns: 1 if it's ok
398 */
399
400static int quota_go_demote_ok(const struct gfs2_glock *gl)
401{
402 return !atomic_read(&gl->gl_lvb_count);
403}
404
405const struct gfs2_glock_operations gfs2_meta_glops = { 387const struct gfs2_glock_operations gfs2_meta_glops = {
406 .go_xmote_th = meta_go_sync,
407 .go_type = LM_TYPE_META, 388 .go_type = LM_TYPE_META,
408}; 389};
409 390
@@ -418,8 +399,8 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
418}; 399};
419 400
420const struct gfs2_glock_operations gfs2_rgrp_glops = { 401const struct gfs2_glock_operations gfs2_rgrp_glops = {
421 .go_xmote_th = meta_go_sync, 402 .go_xmote_th = rgrp_go_sync,
422 .go_inval = meta_go_inval, 403 .go_inval = rgrp_go_inval,
423 .go_demote_ok = rgrp_go_demote_ok, 404 .go_demote_ok = rgrp_go_demote_ok,
424 .go_lock = rgrp_go_lock, 405 .go_lock = rgrp_go_lock,
425 .go_unlock = rgrp_go_unlock, 406 .go_unlock = rgrp_go_unlock,
@@ -448,7 +429,6 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = {
448}; 429};
449 430
450const struct gfs2_glock_operations gfs2_quota_glops = { 431const struct gfs2_glock_operations gfs2_quota_glops = {
451 .go_demote_ok = quota_go_demote_ok,
452 .go_type = LM_TYPE_QUOTA, 432 .go_type = LM_TYPE_QUOTA,
453}; 433};
454 434
@@ -456,3 +436,15 @@ const struct gfs2_glock_operations gfs2_journal_glops = {
456 .go_type = LM_TYPE_JOURNAL, 436 .go_type = LM_TYPE_JOURNAL,
457}; 437};
458 438
439const struct gfs2_glock_operations *gfs2_glops_list[] = {
440 [LM_TYPE_META] = &gfs2_meta_glops,
441 [LM_TYPE_INODE] = &gfs2_inode_glops,
442 [LM_TYPE_RGRP] = &gfs2_rgrp_glops,
443 [LM_TYPE_NONDISK] = &gfs2_trans_glops,
444 [LM_TYPE_IOPEN] = &gfs2_iopen_glops,
445 [LM_TYPE_FLOCK] = &gfs2_flock_glops,
446 [LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
447 [LM_TYPE_QUOTA] = &gfs2_quota_glops,
448 [LM_TYPE_JOURNAL] = &gfs2_journal_glops,
449};
450
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index a1d9b5b024e6..b3aa2e3210fd 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -21,5 +21,6 @@ extern const struct gfs2_glock_operations gfs2_flock_glops;
21extern const struct gfs2_glock_operations gfs2_nondisk_glops; 21extern const struct gfs2_glock_operations gfs2_nondisk_glops;
22extern const struct gfs2_glock_operations gfs2_quota_glops; 22extern const struct gfs2_glock_operations gfs2_quota_glops;
23extern const struct gfs2_glock_operations gfs2_journal_glops; 23extern const struct gfs2_glock_operations gfs2_journal_glops;
24extern const struct gfs2_glock_operations *gfs2_glops_list[];
24 25
25#endif /* __GLOPS_DOT_H__ */ 26#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 608849d00021..399d1b978049 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -12,6 +12,8 @@
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/workqueue.h> 14#include <linux/workqueue.h>
15#include <linux/dlm.h>
16#include <linux/buffer_head.h>
15 17
16#define DIO_WAIT 0x00000010 18#define DIO_WAIT 0x00000010
17#define DIO_METADATA 0x00000020 19#define DIO_METADATA 0x00000020
@@ -26,6 +28,7 @@ struct gfs2_trans;
26struct gfs2_ail; 28struct gfs2_ail;
27struct gfs2_jdesc; 29struct gfs2_jdesc;
28struct gfs2_sbd; 30struct gfs2_sbd;
31struct lm_lockops;
29 32
30typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret); 33typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
31 34
@@ -121,6 +124,28 @@ struct gfs2_bufdata {
121 struct list_head bd_ail_gl_list; 124 struct list_head bd_ail_gl_list;
122}; 125};
123 126
127/*
128 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
129 * prefix of lock_dlm_ gets awkward.
130 */
131
132#define GDLM_STRNAME_BYTES 25
133#define GDLM_LVB_SIZE 32
134
135enum {
136 DFL_BLOCK_LOCKS = 0,
137};
138
139struct lm_lockname {
140 u64 ln_number;
141 unsigned int ln_type;
142};
143
144#define lm_name_equal(name1, name2) \
145 (((name1)->ln_number == (name2)->ln_number) && \
146 ((name1)->ln_type == (name2)->ln_type))
147
148
124struct gfs2_glock_operations { 149struct gfs2_glock_operations {
125 void (*go_xmote_th) (struct gfs2_glock *gl); 150 void (*go_xmote_th) (struct gfs2_glock *gl);
126 int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); 151 int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
@@ -162,6 +187,8 @@ enum {
162 GLF_LFLUSH = 7, 187 GLF_LFLUSH = 7,
163 GLF_INVALIDATE_IN_PROGRESS = 8, 188 GLF_INVALIDATE_IN_PROGRESS = 8,
164 GLF_REPLY_PENDING = 9, 189 GLF_REPLY_PENDING = 9,
190 GLF_INITIAL = 10,
191 GLF_FROZEN = 11,
165}; 192};
166 193
167struct gfs2_glock { 194struct gfs2_glock {
@@ -176,16 +203,15 @@ struct gfs2_glock {
176 unsigned int gl_target; 203 unsigned int gl_target;
177 unsigned int gl_reply; 204 unsigned int gl_reply;
178 unsigned int gl_hash; 205 unsigned int gl_hash;
206 unsigned int gl_req;
179 unsigned int gl_demote_state; /* state requested by remote node */ 207 unsigned int gl_demote_state; /* state requested by remote node */
180 unsigned long gl_demote_time; /* time of first demote request */ 208 unsigned long gl_demote_time; /* time of first demote request */
181 struct list_head gl_holders; 209 struct list_head gl_holders;
182 210
183 const struct gfs2_glock_operations *gl_ops; 211 const struct gfs2_glock_operations *gl_ops;
184 void *gl_lock; 212 char gl_strname[GDLM_STRNAME_BYTES];
185 char *gl_lvb; 213 struct dlm_lksb gl_lksb;
186 atomic_t gl_lvb_count; 214 char gl_lvb[32];
187
188 unsigned long gl_stamp;
189 unsigned long gl_tchange; 215 unsigned long gl_tchange;
190 void *gl_object; 216 void *gl_object;
191 217
@@ -283,7 +309,9 @@ enum {
283 309
284struct gfs2_quota_data { 310struct gfs2_quota_data {
285 struct list_head qd_list; 311 struct list_head qd_list;
286 unsigned int qd_count; 312 struct list_head qd_reclaim;
313
314 atomic_t qd_count;
287 315
288 u32 qd_id; 316 u32 qd_id;
289 unsigned long qd_flags; /* QDF_... */ 317 unsigned long qd_flags; /* QDF_... */
@@ -303,7 +331,6 @@ struct gfs2_quota_data {
303 331
304 u64 qd_sync_gen; 332 u64 qd_sync_gen;
305 unsigned long qd_last_warn; 333 unsigned long qd_last_warn;
306 unsigned long qd_last_touched;
307}; 334};
308 335
309struct gfs2_trans { 336struct gfs2_trans {
@@ -390,7 +417,7 @@ struct gfs2_args {
390 unsigned int ar_suiddir:1; /* suiddir support */ 417 unsigned int ar_suiddir:1; /* suiddir support */
391 unsigned int ar_data:2; /* ordered/writeback */ 418 unsigned int ar_data:2; /* ordered/writeback */
392 unsigned int ar_meta:1; /* mount metafs */ 419 unsigned int ar_meta:1; /* mount metafs */
393 unsigned int ar_num_glockd; /* Number of glockd threads */ 420 unsigned int ar_discard:1; /* discard requests */
394}; 421};
395 422
396struct gfs2_tune { 423struct gfs2_tune {
@@ -406,7 +433,6 @@ struct gfs2_tune {
406 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ 433 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
407 unsigned int gt_quota_scale_num; /* Numerator */ 434 unsigned int gt_quota_scale_num; /* Numerator */
408 unsigned int gt_quota_scale_den; /* Denominator */ 435 unsigned int gt_quota_scale_den; /* Denominator */
409 unsigned int gt_quota_cache_secs;
410 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ 436 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
411 unsigned int gt_new_files_jdata; 437 unsigned int gt_new_files_jdata;
412 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ 438 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
@@ -445,6 +471,31 @@ struct gfs2_sb_host {
445 471
446 char sb_lockproto[GFS2_LOCKNAME_LEN]; 472 char sb_lockproto[GFS2_LOCKNAME_LEN];
447 char sb_locktable[GFS2_LOCKNAME_LEN]; 473 char sb_locktable[GFS2_LOCKNAME_LEN];
474 u8 sb_uuid[16];
475};
476
477/*
478 * lm_mount() return values
479 *
480 * ls_jid - the journal ID this node should use
481 * ls_first - this node is the first to mount the file system
482 * ls_lockspace - lock module's context for this file system
483 * ls_ops - lock module's functions
484 */
485
486struct lm_lockstruct {
487 u32 ls_id;
488 unsigned int ls_jid;
489 unsigned int ls_first;
490 unsigned int ls_first_done;
491 unsigned int ls_nodir;
492 const struct lm_lockops *ls_ops;
493 unsigned long ls_flags;
494 dlm_lockspace_t *ls_dlm;
495
496 int ls_recover_jid;
497 int ls_recover_jid_done;
498 int ls_recover_jid_status;
448}; 499};
449 500
450struct gfs2_sbd { 501struct gfs2_sbd {
@@ -520,7 +571,6 @@ struct gfs2_sbd {
520 spinlock_t sd_jindex_spin; 571 spinlock_t sd_jindex_spin;
521 struct mutex sd_jindex_mutex; 572 struct mutex sd_jindex_mutex;
522 unsigned int sd_journals; 573 unsigned int sd_journals;
523 unsigned long sd_jindex_refresh_time;
524 574
525 struct gfs2_jdesc *sd_jdesc; 575 struct gfs2_jdesc *sd_jdesc;
526 struct gfs2_holder sd_journal_gh; 576 struct gfs2_holder sd_journal_gh;
@@ -540,7 +590,6 @@ struct gfs2_sbd {
540 590
541 struct list_head sd_quota_list; 591 struct list_head sd_quota_list;
542 atomic_t sd_quota_count; 592 atomic_t sd_quota_count;
543 spinlock_t sd_quota_spin;
544 struct mutex sd_quota_mutex; 593 struct mutex sd_quota_mutex;
545 wait_queue_head_t sd_quota_wait; 594 wait_queue_head_t sd_quota_wait;
546 struct list_head sd_trunc_list; 595 struct list_head sd_trunc_list;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3b87c188da41..7b277d449155 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -16,7 +16,6 @@
16#include <linux/sort.h> 16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h> 17#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h> 18#include <linux/crc32.h>
19#include <linux/lm_interface.h>
20#include <linux/security.h> 19#include <linux/security.h>
21#include <linux/time.h> 20#include <linux/time.h>
22 21
@@ -137,16 +136,16 @@ void gfs2_set_iop(struct inode *inode)
137 136
138 if (S_ISREG(mode)) { 137 if (S_ISREG(mode)) {
139 inode->i_op = &gfs2_file_iops; 138 inode->i_op = &gfs2_file_iops;
140 if (sdp->sd_args.ar_localflocks) 139 if (gfs2_localflocks(sdp))
141 inode->i_fop = &gfs2_file_fops_nolock; 140 inode->i_fop = gfs2_file_fops_nolock;
142 else 141 else
143 inode->i_fop = &gfs2_file_fops; 142 inode->i_fop = gfs2_file_fops;
144 } else if (S_ISDIR(mode)) { 143 } else if (S_ISDIR(mode)) {
145 inode->i_op = &gfs2_dir_iops; 144 inode->i_op = &gfs2_dir_iops;
146 if (sdp->sd_args.ar_localflocks) 145 if (gfs2_localflocks(sdp))
147 inode->i_fop = &gfs2_dir_fops_nolock; 146 inode->i_fop = gfs2_dir_fops_nolock;
148 else 147 else
149 inode->i_fop = &gfs2_dir_fops; 148 inode->i_fop = gfs2_dir_fops;
150 } else if (S_ISLNK(mode)) { 149 } else if (S_ISLNK(mode)) {
151 inode->i_op = &gfs2_symlink_iops; 150 inode->i_op = &gfs2_symlink_iops;
152 } else { 151 } else {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d5329364cdff..dca4fee3078b 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -101,12 +101,26 @@ void gfs2_dinode_print(const struct gfs2_inode *ip);
101extern const struct inode_operations gfs2_file_iops; 101extern const struct inode_operations gfs2_file_iops;
102extern const struct inode_operations gfs2_dir_iops; 102extern const struct inode_operations gfs2_dir_iops;
103extern const struct inode_operations gfs2_symlink_iops; 103extern const struct inode_operations gfs2_symlink_iops;
104extern const struct file_operations gfs2_file_fops; 104extern const struct file_operations *gfs2_file_fops_nolock;
105extern const struct file_operations gfs2_dir_fops; 105extern const struct file_operations *gfs2_dir_fops_nolock;
106extern const struct file_operations gfs2_file_fops_nolock;
107extern const struct file_operations gfs2_dir_fops_nolock;
108 106
109extern void gfs2_set_inode_flags(struct inode *inode); 107extern void gfs2_set_inode_flags(struct inode *inode);
108
109#ifdef CONFIG_GFS2_FS_LOCKING_DLM
110extern const struct file_operations *gfs2_file_fops;
111extern const struct file_operations *gfs2_dir_fops;
112static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
113{
114 return sdp->sd_args.ar_localflocks;
115}
116#else /* Single node only */
117#define gfs2_file_fops NULL
118#define gfs2_dir_fops NULL
119static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
120{
121 return 1;
122}
123#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
110 124
111#endif /* __INODE_DOT_H__ */ 125#endif /* __INODE_DOT_H__ */
112 126
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
new file mode 100644
index 000000000000..46df988323bc
--- /dev/null
+++ b/fs/gfs2/lock_dlm.c
@@ -0,0 +1,241 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/fs.h>
11#include <linux/dlm.h>
12#include <linux/types.h>
13#include <linux/gfs2_ondisk.h>
14
15#include "incore.h"
16#include "glock.h"
17#include "util.h"
18
19
20static void gdlm_ast(void *arg)
21{
22 struct gfs2_glock *gl = arg;
23 unsigned ret = gl->gl_state;
24
25 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
26
27 if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
28 memset(gl->gl_lvb, 0, GDLM_LVB_SIZE);
29
30 switch (gl->gl_lksb.sb_status) {
31 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
32 kmem_cache_free(gfs2_glock_cachep, gl);
33 return;
34 case -DLM_ECANCEL: /* Cancel while getting lock */
35 ret |= LM_OUT_CANCELED;
36 goto out;
37 case -EAGAIN: /* Try lock fails */
38 goto out;
39 case -EINVAL: /* Invalid */
40 case -ENOMEM: /* Out of memory */
41 ret |= LM_OUT_ERROR;
42 goto out;
43 case 0: /* Success */
44 break;
45 default: /* Something unexpected */
46 BUG();
47 }
48
49 ret = gl->gl_req;
50 if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) {
51 if (gl->gl_req == LM_ST_SHARED)
52 ret = LM_ST_DEFERRED;
53 else if (gl->gl_req == LM_ST_DEFERRED)
54 ret = LM_ST_SHARED;
55 else
56 BUG();
57 }
58
59 set_bit(GLF_INITIAL, &gl->gl_flags);
60 gfs2_glock_complete(gl, ret);
61 return;
62out:
63 if (!test_bit(GLF_INITIAL, &gl->gl_flags))
64 gl->gl_lksb.sb_lkid = 0;
65 gfs2_glock_complete(gl, ret);
66}
67
68static void gdlm_bast(void *arg, int mode)
69{
70 struct gfs2_glock *gl = arg;
71
72 switch (mode) {
73 case DLM_LOCK_EX:
74 gfs2_glock_cb(gl, LM_ST_UNLOCKED);
75 break;
76 case DLM_LOCK_CW:
77 gfs2_glock_cb(gl, LM_ST_DEFERRED);
78 break;
79 case DLM_LOCK_PR:
80 gfs2_glock_cb(gl, LM_ST_SHARED);
81 break;
82 default:
83 printk(KERN_ERR "unknown bast mode %d", mode);
84 BUG();
85 }
86}
87
88/* convert gfs lock-state to dlm lock-mode */
89
90static int make_mode(const unsigned int lmstate)
91{
92 switch (lmstate) {
93 case LM_ST_UNLOCKED:
94 return DLM_LOCK_NL;
95 case LM_ST_EXCLUSIVE:
96 return DLM_LOCK_EX;
97 case LM_ST_DEFERRED:
98 return DLM_LOCK_CW;
99 case LM_ST_SHARED:
100 return DLM_LOCK_PR;
101 }
102 printk(KERN_ERR "unknown LM state %d", lmstate);
103 BUG();
104 return -1;
105}
106
107static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
108 const int req)
109{
110 u32 lkf = 0;
111
112 if (gfs_flags & LM_FLAG_TRY)
113 lkf |= DLM_LKF_NOQUEUE;
114
115 if (gfs_flags & LM_FLAG_TRY_1CB) {
116 lkf |= DLM_LKF_NOQUEUE;
117 lkf |= DLM_LKF_NOQUEUEBAST;
118 }
119
120 if (gfs_flags & LM_FLAG_PRIORITY) {
121 lkf |= DLM_LKF_NOORDER;
122 lkf |= DLM_LKF_HEADQUE;
123 }
124
125 if (gfs_flags & LM_FLAG_ANY) {
126 if (req == DLM_LOCK_PR)
127 lkf |= DLM_LKF_ALTCW;
128 else if (req == DLM_LOCK_CW)
129 lkf |= DLM_LKF_ALTPR;
130 else
131 BUG();
132 }
133
134 if (lkid != 0)
135 lkf |= DLM_LKF_CONVERT;
136
137 lkf |= DLM_LKF_VALBLK;
138
139 return lkf;
140}
141
142static unsigned int gdlm_lock(struct gfs2_glock *gl,
143 unsigned int req_state, unsigned int flags)
144{
145 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
146 int error;
147 int req;
148 u32 lkf;
149
150 gl->gl_req = req_state;
151 req = make_mode(req_state);
152 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
153
154 /*
155 * Submit the actual lock request.
156 */
157
158 error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
159 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
160 if (error == -EAGAIN)
161 return 0;
162 if (error)
163 return LM_OUT_ERROR;
164 return LM_OUT_ASYNC;
165}
166
167static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr)
168{
169 struct gfs2_glock *gl = ptr;
170 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
171 int error;
172
173 if (gl->gl_lksb.sb_lkid == 0) {
174 kmem_cache_free(cachep, gl);
175 return;
176 }
177
178 error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
179 NULL, gl);
180 if (error) {
181 printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n",
182 gl->gl_name.ln_type,
183 (unsigned long long)gl->gl_name.ln_number, error);
184 return;
185 }
186}
187
188static void gdlm_cancel(struct gfs2_glock *gl)
189{
190 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
191 dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
192}
193
194static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname)
195{
196 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
197 int error;
198
199 if (fsname == NULL) {
200 fs_info(sdp, "no fsname found\n");
201 return -EINVAL;
202 }
203
204 error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm,
205 DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
206 (ls->ls_nodir ? DLM_LSFL_NODIR : 0),
207 GDLM_LVB_SIZE);
208 if (error)
209 printk(KERN_ERR "dlm_new_lockspace error %d", error);
210
211 return error;
212}
213
214static void gdlm_unmount(struct gfs2_sbd *sdp)
215{
216 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
217
218 if (ls->ls_dlm) {
219 dlm_release_lockspace(ls->ls_dlm, 2);
220 ls->ls_dlm = NULL;
221 }
222}
223
224static const match_table_t dlm_tokens = {
225 { Opt_jid, "jid=%d"},
226 { Opt_id, "id=%d"},
227 { Opt_first, "first=%d"},
228 { Opt_nodir, "nodir=%d"},
229 { Opt_err, NULL },
230};
231
232const struct lm_lockops gfs2_dlm_ops = {
233 .lm_proto_name = "lock_dlm",
234 .lm_mount = gdlm_mount,
235 .lm_unmount = gdlm_unmount,
236 .lm_put_lock = gdlm_put_lock,
237 .lm_lock = gdlm_lock,
238 .lm_cancel = gdlm_cancel,
239 .lm_tokens = &dlm_tokens,
240};
241
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
deleted file mode 100644
index 523243a13a21..000000000000
--- a/fs/gfs2/locking.c
+++ /dev/null
@@ -1,232 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/string.h>
13#include <linux/slab.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16#include <linux/kmod.h>
17#include <linux/fs.h>
18#include <linux/delay.h>
19#include <linux/lm_interface.h>
20
21struct lmh_wrapper {
22 struct list_head lw_list;
23 const struct lm_lockops *lw_ops;
24};
25
26static int nolock_mount(char *table_name, char *host_data,
27 lm_callback_t cb, void *cb_data,
28 unsigned int min_lvb_size, int flags,
29 struct lm_lockstruct *lockstruct,
30 struct kobject *fskobj);
31
32/* List of registered low-level locking protocols. A file system selects one
33 of them by name at mount time, e.g. lock_nolock, lock_dlm. */
34
35static const struct lm_lockops nolock_ops = {
36 .lm_proto_name = "lock_nolock",
37 .lm_mount = nolock_mount,
38};
39
40static struct lmh_wrapper nolock_proto = {
41 .lw_list = LIST_HEAD_INIT(nolock_proto.lw_list),
42 .lw_ops = &nolock_ops,
43};
44
45static LIST_HEAD(lmh_list);
46static DEFINE_MUTEX(lmh_lock);
47
48static int nolock_mount(char *table_name, char *host_data,
49 lm_callback_t cb, void *cb_data,
50 unsigned int min_lvb_size, int flags,
51 struct lm_lockstruct *lockstruct,
52 struct kobject *fskobj)
53{
54 char *c;
55 unsigned int jid;
56
57 c = strstr(host_data, "jid=");
58 if (!c)
59 jid = 0;
60 else {
61 c += 4;
62 sscanf(c, "%u", &jid);
63 }
64
65 lockstruct->ls_jid = jid;
66 lockstruct->ls_first = 1;
67 lockstruct->ls_lvb_size = min_lvb_size;
68 lockstruct->ls_ops = &nolock_ops;
69 lockstruct->ls_flags = LM_LSFLAG_LOCAL;
70
71 return 0;
72}
73
74/**
75 * gfs2_register_lockproto - Register a low-level locking protocol
76 * @proto: the protocol definition
77 *
78 * Returns: 0 on success, -EXXX on failure
79 */
80
81int gfs2_register_lockproto(const struct lm_lockops *proto)
82{
83 struct lmh_wrapper *lw;
84
85 mutex_lock(&lmh_lock);
86
87 list_for_each_entry(lw, &lmh_list, lw_list) {
88 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
89 mutex_unlock(&lmh_lock);
90 printk(KERN_INFO "GFS2: protocol %s already exists\n",
91 proto->lm_proto_name);
92 return -EEXIST;
93 }
94 }
95
96 lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
97 if (!lw) {
98 mutex_unlock(&lmh_lock);
99 return -ENOMEM;
100 }
101
102 lw->lw_ops = proto;
103 list_add(&lw->lw_list, &lmh_list);
104
105 mutex_unlock(&lmh_lock);
106
107 return 0;
108}
109
110/**
111 * gfs2_unregister_lockproto - Unregister a low-level locking protocol
112 * @proto: the protocol definition
113 *
114 */
115
116void gfs2_unregister_lockproto(const struct lm_lockops *proto)
117{
118 struct lmh_wrapper *lw;
119
120 mutex_lock(&lmh_lock);
121
122 list_for_each_entry(lw, &lmh_list, lw_list) {
123 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
124 list_del(&lw->lw_list);
125 mutex_unlock(&lmh_lock);
126 kfree(lw);
127 return;
128 }
129 }
130
131 mutex_unlock(&lmh_lock);
132
133 printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
134 proto->lm_proto_name);
135}
136
137/**
138 * gfs2_mount_lockproto - Mount a lock protocol
139 * @proto_name - the name of the protocol
140 * @table_name - the name of the lock space
141 * @host_data - data specific to this host
142 * @cb - the callback to the code using the lock module
143 * @sdp - The GFS2 superblock
144 * @min_lvb_size - the mininum LVB size that the caller can deal with
145 * @flags - LM_MFLAG_*
146 * @lockstruct - a structure returned describing the mount
147 *
148 * Returns: 0 on success, -EXXX on failure
149 */
150
151int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
152 lm_callback_t cb, void *cb_data,
153 unsigned int min_lvb_size, int flags,
154 struct lm_lockstruct *lockstruct,
155 struct kobject *fskobj)
156{
157 struct lmh_wrapper *lw = NULL;
158 int try = 0;
159 int error, found;
160
161
162retry:
163 mutex_lock(&lmh_lock);
164
165 if (list_empty(&nolock_proto.lw_list))
166 list_add(&nolock_proto.lw_list, &lmh_list);
167
168 found = 0;
169 list_for_each_entry(lw, &lmh_list, lw_list) {
170 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
171 found = 1;
172 break;
173 }
174 }
175
176 if (!found) {
177 if (!try && capable(CAP_SYS_MODULE)) {
178 try = 1;
179 mutex_unlock(&lmh_lock);
180 request_module(proto_name);
181 goto retry;
182 }
183 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
184 error = -ENOENT;
185 goto out;
186 }
187
188 if (lw->lw_ops->lm_owner &&
189 !try_module_get(lw->lw_ops->lm_owner)) {
190 try = 0;
191 mutex_unlock(&lmh_lock);
192 msleep(1000);
193 goto retry;
194 }
195
196 error = lw->lw_ops->lm_mount(table_name, host_data, cb, cb_data,
197 min_lvb_size, flags, lockstruct, fskobj);
198 if (error)
199 module_put(lw->lw_ops->lm_owner);
200out:
201 mutex_unlock(&lmh_lock);
202 return error;
203}
204
205void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
206{
207 mutex_lock(&lmh_lock);
208 if (lockstruct->ls_ops->lm_unmount)
209 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
210 if (lockstruct->ls_ops->lm_owner)
211 module_put(lockstruct->ls_ops->lm_owner);
212 mutex_unlock(&lmh_lock);
213}
214
215/**
216 * gfs2_withdraw_lockproto - abnormally unmount a lock module
217 * @lockstruct: the lockstruct passed into mount
218 *
219 */
220
221void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
222{
223 mutex_lock(&lmh_lock);
224 lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
225 if (lockstruct->ls_ops->lm_owner)
226 module_put(lockstruct->ls_ops->lm_owner);
227 mutex_unlock(&lmh_lock);
228}
229
230EXPORT_SYMBOL_GPL(gfs2_register_lockproto);
231EXPORT_SYMBOL_GPL(gfs2_unregister_lockproto);
232
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
deleted file mode 100644
index 2609bb6cd013..000000000000
--- a/fs/gfs2/locking/dlm/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
1obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
2lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o
3
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
deleted file mode 100644
index 2482c9047505..000000000000
--- a/fs/gfs2/locking/dlm/lock.c
+++ /dev/null
@@ -1,708 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12static char junk_lvb[GDLM_LVB_SIZE];
13
14
15/* convert dlm lock-mode to gfs lock-state */
16
17static s16 gdlm_make_lmstate(s16 dlmmode)
18{
19 switch (dlmmode) {
20 case DLM_LOCK_IV:
21 case DLM_LOCK_NL:
22 return LM_ST_UNLOCKED;
23 case DLM_LOCK_EX:
24 return LM_ST_EXCLUSIVE;
25 case DLM_LOCK_CW:
26 return LM_ST_DEFERRED;
27 case DLM_LOCK_PR:
28 return LM_ST_SHARED;
29 }
30 gdlm_assert(0, "unknown DLM mode %d", dlmmode);
31 return -1;
32}
33
34/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
35 thread gets to it. */
36
37static void queue_submit(struct gdlm_lock *lp)
38{
39 struct gdlm_ls *ls = lp->ls;
40
41 spin_lock(&ls->async_lock);
42 list_add_tail(&lp->delay_list, &ls->submit);
43 spin_unlock(&ls->async_lock);
44 wake_up(&ls->thread_wait);
45}
46
47static void wake_up_ast(struct gdlm_lock *lp)
48{
49 clear_bit(LFL_AST_WAIT, &lp->flags);
50 smp_mb__after_clear_bit();
51 wake_up_bit(&lp->flags, LFL_AST_WAIT);
52}
53
54static void gdlm_delete_lp(struct gdlm_lock *lp)
55{
56 struct gdlm_ls *ls = lp->ls;
57
58 spin_lock(&ls->async_lock);
59 if (!list_empty(&lp->delay_list))
60 list_del_init(&lp->delay_list);
61 ls->all_locks_count--;
62 spin_unlock(&ls->async_lock);
63
64 kfree(lp);
65}
66
67static void gdlm_queue_delayed(struct gdlm_lock *lp)
68{
69 struct gdlm_ls *ls = lp->ls;
70
71 spin_lock(&ls->async_lock);
72 list_add_tail(&lp->delay_list, &ls->delayed);
73 spin_unlock(&ls->async_lock);
74}
75
76static void process_complete(struct gdlm_lock *lp)
77{
78 struct gdlm_ls *ls = lp->ls;
79 struct lm_async_cb acb;
80
81 memset(&acb, 0, sizeof(acb));
82
83 if (lp->lksb.sb_status == -DLM_ECANCEL) {
84 log_info("complete dlm cancel %x,%llx flags %lx",
85 lp->lockname.ln_type,
86 (unsigned long long)lp->lockname.ln_number,
87 lp->flags);
88
89 lp->req = lp->cur;
90 acb.lc_ret |= LM_OUT_CANCELED;
91 if (lp->cur == DLM_LOCK_IV)
92 lp->lksb.sb_lkid = 0;
93 goto out;
94 }
95
96 if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
97 if (lp->lksb.sb_status != -DLM_EUNLOCK) {
98 log_info("unlock sb_status %d %x,%llx flags %lx",
99 lp->lksb.sb_status, lp->lockname.ln_type,
100 (unsigned long long)lp->lockname.ln_number,
101 lp->flags);
102 return;
103 }
104
105 lp->cur = DLM_LOCK_IV;
106 lp->req = DLM_LOCK_IV;
107 lp->lksb.sb_lkid = 0;
108
109 if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
110 gdlm_delete_lp(lp);
111 return;
112 }
113 goto out;
114 }
115
116 if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
117 memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
118
119 if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
120 if (lp->req == DLM_LOCK_PR)
121 lp->req = DLM_LOCK_CW;
122 else if (lp->req == DLM_LOCK_CW)
123 lp->req = DLM_LOCK_PR;
124 }
125
126 /*
127 * A canceled lock request. The lock was just taken off the delayed
128 * list and was never even submitted to dlm.
129 */
130
131 if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
132 log_info("complete internal cancel %x,%llx",
133 lp->lockname.ln_type,
134 (unsigned long long)lp->lockname.ln_number);
135 lp->req = lp->cur;
136 acb.lc_ret |= LM_OUT_CANCELED;
137 goto out;
138 }
139
140 /*
141 * An error occured.
142 */
143
144 if (lp->lksb.sb_status) {
145 /* a "normal" error */
146 if ((lp->lksb.sb_status == -EAGAIN) &&
147 (lp->lkf & DLM_LKF_NOQUEUE)) {
148 lp->req = lp->cur;
149 if (lp->cur == DLM_LOCK_IV)
150 lp->lksb.sb_lkid = 0;
151 goto out;
152 }
153
154 /* this could only happen with cancels I think */
155 log_info("ast sb_status %d %x,%llx flags %lx",
156 lp->lksb.sb_status, lp->lockname.ln_type,
157 (unsigned long long)lp->lockname.ln_number,
158 lp->flags);
159 return;
160 }
161
162 /*
163 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
164 */
165
166 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
167 wake_up_ast(lp);
168 return;
169 }
170
171 /*
172 * A lock has been demoted to NL because it initially completed during
173 * BLOCK_LOCKS. Now it must be requested in the originally requested
174 * mode.
175 */
176
177 if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
178 gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
179 lp->lockname.ln_type,
180 (unsigned long long)lp->lockname.ln_number);
181 gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
182 lp->lockname.ln_type,
183 (unsigned long long)lp->lockname.ln_number);
184
185 lp->cur = DLM_LOCK_NL;
186 lp->req = lp->prev_req;
187 lp->prev_req = DLM_LOCK_IV;
188 lp->lkf &= ~DLM_LKF_CONVDEADLK;
189
190 set_bit(LFL_NOCACHE, &lp->flags);
191
192 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
193 !test_bit(LFL_NOBLOCK, &lp->flags))
194 gdlm_queue_delayed(lp);
195 else
196 queue_submit(lp);
197 return;
198 }
199
200 /*
201 * A request is granted during dlm recovery. It may be granted
202 * because the locks of a failed node were cleared. In that case,
203 * there may be inconsistent data beneath this lock and we must wait
204 * for recovery to complete to use it. When gfs recovery is done this
205 * granted lock will be converted to NL and then reacquired in this
206 * granted state.
207 */
208
209 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
210 !test_bit(LFL_NOBLOCK, &lp->flags) &&
211 lp->req != DLM_LOCK_NL) {
212
213 lp->cur = lp->req;
214 lp->prev_req = lp->req;
215 lp->req = DLM_LOCK_NL;
216 lp->lkf |= DLM_LKF_CONVERT;
217 lp->lkf &= ~DLM_LKF_CONVDEADLK;
218
219 log_debug("rereq %x,%llx id %x %d,%d",
220 lp->lockname.ln_type,
221 (unsigned long long)lp->lockname.ln_number,
222 lp->lksb.sb_lkid, lp->cur, lp->req);
223
224 set_bit(LFL_REREQUEST, &lp->flags);
225 queue_submit(lp);
226 return;
227 }
228
229 /*
230 * DLM demoted the lock to NL before it was granted so GFS must be
231 * told it cannot cache data for this lock.
232 */
233
234 if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
235 set_bit(LFL_NOCACHE, &lp->flags);
236
237out:
238 /*
239 * This is an internal lock_dlm lock
240 */
241
242 if (test_bit(LFL_INLOCK, &lp->flags)) {
243 clear_bit(LFL_NOBLOCK, &lp->flags);
244 lp->cur = lp->req;
245 wake_up_ast(lp);
246 return;
247 }
248
249 /*
250 * Normal completion of a lock request. Tell GFS it now has the lock.
251 */
252
253 clear_bit(LFL_NOBLOCK, &lp->flags);
254 lp->cur = lp->req;
255
256 acb.lc_name = lp->lockname;
257 acb.lc_ret |= gdlm_make_lmstate(lp->cur);
258
259 ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
260}
261
262static void gdlm_ast(void *astarg)
263{
264 struct gdlm_lock *lp = astarg;
265 clear_bit(LFL_ACTIVE, &lp->flags);
266 process_complete(lp);
267}
268
269static void process_blocking(struct gdlm_lock *lp, int bast_mode)
270{
271 struct gdlm_ls *ls = lp->ls;
272 unsigned int cb = 0;
273
274 switch (gdlm_make_lmstate(bast_mode)) {
275 case LM_ST_EXCLUSIVE:
276 cb = LM_CB_NEED_E;
277 break;
278 case LM_ST_DEFERRED:
279 cb = LM_CB_NEED_D;
280 break;
281 case LM_ST_SHARED:
282 cb = LM_CB_NEED_S;
283 break;
284 default:
285 gdlm_assert(0, "unknown bast mode %u", bast_mode);
286 }
287
288 ls->fscb(ls->sdp, cb, &lp->lockname);
289}
290
291
292static void gdlm_bast(void *astarg, int mode)
293{
294 struct gdlm_lock *lp = astarg;
295
296 if (!mode) {
297 printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
298 lp->lockname.ln_type,
299 (unsigned long long)lp->lockname.ln_number);
300 return;
301 }
302
303 process_blocking(lp, mode);
304}
305
306/* convert gfs lock-state to dlm lock-mode */
307
308static s16 make_mode(s16 lmstate)
309{
310 switch (lmstate) {
311 case LM_ST_UNLOCKED:
312 return DLM_LOCK_NL;
313 case LM_ST_EXCLUSIVE:
314 return DLM_LOCK_EX;
315 case LM_ST_DEFERRED:
316 return DLM_LOCK_CW;
317 case LM_ST_SHARED:
318 return DLM_LOCK_PR;
319 }
320 gdlm_assert(0, "unknown LM state %d", lmstate);
321 return -1;
322}
323
324
325/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
326 DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
327
328static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
329{
330 s16 cur = make_mode(cur_state);
331 if (lp->cur != DLM_LOCK_IV)
332 gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
333}
334
335static inline unsigned int make_flags(struct gdlm_lock *lp,
336 unsigned int gfs_flags,
337 s16 cur, s16 req)
338{
339 unsigned int lkf = 0;
340
341 if (gfs_flags & LM_FLAG_TRY)
342 lkf |= DLM_LKF_NOQUEUE;
343
344 if (gfs_flags & LM_FLAG_TRY_1CB) {
345 lkf |= DLM_LKF_NOQUEUE;
346 lkf |= DLM_LKF_NOQUEUEBAST;
347 }
348
349 if (gfs_flags & LM_FLAG_PRIORITY) {
350 lkf |= DLM_LKF_NOORDER;
351 lkf |= DLM_LKF_HEADQUE;
352 }
353
354 if (gfs_flags & LM_FLAG_ANY) {
355 if (req == DLM_LOCK_PR)
356 lkf |= DLM_LKF_ALTCW;
357 else if (req == DLM_LOCK_CW)
358 lkf |= DLM_LKF_ALTPR;
359 }
360
361 if (lp->lksb.sb_lkid != 0) {
362 lkf |= DLM_LKF_CONVERT;
363 }
364
365 if (lp->lvb)
366 lkf |= DLM_LKF_VALBLK;
367
368 return lkf;
369}
370
371/* make_strname - convert GFS lock numbers to a string */
372
373static inline void make_strname(const struct lm_lockname *lockname,
374 struct gdlm_strname *str)
375{
376 sprintf(str->name, "%8x%16llx", lockname->ln_type,
377 (unsigned long long)lockname->ln_number);
378 str->namelen = GDLM_STRNAME_BYTES;
379}
380
381static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
382 struct gdlm_lock **lpp)
383{
384 struct gdlm_lock *lp;
385
386 lp = kzalloc(sizeof(struct gdlm_lock), GFP_NOFS);
387 if (!lp)
388 return -ENOMEM;
389
390 lp->lockname = *name;
391 make_strname(name, &lp->strname);
392 lp->ls = ls;
393 lp->cur = DLM_LOCK_IV;
394 INIT_LIST_HEAD(&lp->delay_list);
395
396 spin_lock(&ls->async_lock);
397 ls->all_locks_count++;
398 spin_unlock(&ls->async_lock);
399
400 *lpp = lp;
401 return 0;
402}
403
404int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
405 void **lockp)
406{
407 struct gdlm_lock *lp;
408 int error;
409
410 error = gdlm_create_lp(lockspace, name, &lp);
411
412 *lockp = lp;
413 return error;
414}
415
416void gdlm_put_lock(void *lock)
417{
418 gdlm_delete_lp(lock);
419}
420
421unsigned int gdlm_do_lock(struct gdlm_lock *lp)
422{
423 struct gdlm_ls *ls = lp->ls;
424 int error, bast = 1;
425
426 /*
427 * When recovery is in progress, delay lock requests for submission
428 * once recovery is done. Requests for recovery (NOEXP) and unlocks
429 * can pass.
430 */
431
432 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
433 !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
434 gdlm_queue_delayed(lp);
435 return LM_OUT_ASYNC;
436 }
437
438 /*
439 * Submit the actual lock request.
440 */
441
442 if (test_bit(LFL_NOBAST, &lp->flags))
443 bast = 0;
444
445 set_bit(LFL_ACTIVE, &lp->flags);
446
447 log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
448 (unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid,
449 lp->cur, lp->req, lp->lkf);
450
451 error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
452 lp->strname.name, lp->strname.namelen, 0, gdlm_ast,
453 lp, bast ? gdlm_bast : NULL);
454
455 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
456 lp->lksb.sb_status = -EAGAIN;
457 gdlm_ast(lp);
458 error = 0;
459 }
460
461 if (error) {
462 log_error("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
463 "flags=%lx", ls->fsname, lp->lockname.ln_type,
464 (unsigned long long)lp->lockname.ln_number, error,
465 lp->cur, lp->req, lp->lkf, lp->flags);
466 return LM_OUT_ERROR;
467 }
468 return LM_OUT_ASYNC;
469}
470
471static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
472{
473 struct gdlm_ls *ls = lp->ls;
474 unsigned int lkf = 0;
475 int error;
476
477 set_bit(LFL_DLM_UNLOCK, &lp->flags);
478 set_bit(LFL_ACTIVE, &lp->flags);
479
480 if (lp->lvb)
481 lkf = DLM_LKF_VALBLK;
482
483 log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
484 (unsigned long long)lp->lockname.ln_number,
485 lp->lksb.sb_lkid, lp->cur, lkf);
486
487 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
488
489 if (error) {
490 log_error("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
491 "flags=%lx", ls->fsname, lp->lockname.ln_type,
492 (unsigned long long)lp->lockname.ln_number, error,
493 lp->cur, lp->req, lp->lkf, lp->flags);
494 return LM_OUT_ERROR;
495 }
496 return LM_OUT_ASYNC;
497}
498
499unsigned int gdlm_lock(void *lock, unsigned int cur_state,
500 unsigned int req_state, unsigned int flags)
501{
502 struct gdlm_lock *lp = lock;
503
504 if (req_state == LM_ST_UNLOCKED)
505 return gdlm_unlock(lock, cur_state);
506
507 if (req_state == LM_ST_UNLOCKED)
508 return gdlm_unlock(lock, cur_state);
509
510 clear_bit(LFL_DLM_CANCEL, &lp->flags);
511 if (flags & LM_FLAG_NOEXP)
512 set_bit(LFL_NOBLOCK, &lp->flags);
513
514 check_cur_state(lp, cur_state);
515 lp->req = make_mode(req_state);
516 lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
517
518 return gdlm_do_lock(lp);
519}
520
521unsigned int gdlm_unlock(void *lock, unsigned int cur_state)
522{
523 struct gdlm_lock *lp = lock;
524
525 clear_bit(LFL_DLM_CANCEL, &lp->flags);
526 if (lp->cur == DLM_LOCK_IV)
527 return 0;
528 return gdlm_do_unlock(lp);
529}
530
531void gdlm_cancel(void *lock)
532{
533 struct gdlm_lock *lp = lock;
534 struct gdlm_ls *ls = lp->ls;
535 int error, delay_list = 0;
536
537 if (test_bit(LFL_DLM_CANCEL, &lp->flags))
538 return;
539
540 log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type,
541 (unsigned long long)lp->lockname.ln_number, lp->flags);
542
543 spin_lock(&ls->async_lock);
544 if (!list_empty(&lp->delay_list)) {
545 list_del_init(&lp->delay_list);
546 delay_list = 1;
547 }
548 spin_unlock(&ls->async_lock);
549
550 if (delay_list) {
551 set_bit(LFL_CANCEL, &lp->flags);
552 set_bit(LFL_ACTIVE, &lp->flags);
553 gdlm_ast(lp);
554 return;
555 }
556
557 if (!test_bit(LFL_ACTIVE, &lp->flags) ||
558 test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
559 log_info("gdlm_cancel skip %x,%llx flags %lx",
560 lp->lockname.ln_type,
561 (unsigned long long)lp->lockname.ln_number, lp->flags);
562 return;
563 }
564
565 /* the lock is blocked in the dlm */
566
567 set_bit(LFL_DLM_CANCEL, &lp->flags);
568 set_bit(LFL_ACTIVE, &lp->flags);
569
570 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
571 NULL, lp);
572
573 log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
574 lp->lockname.ln_type,
575 (unsigned long long)lp->lockname.ln_number, lp->flags);
576
577 if (error == -EBUSY)
578 clear_bit(LFL_DLM_CANCEL, &lp->flags);
579}
580
581static int gdlm_add_lvb(struct gdlm_lock *lp)
582{
583 char *lvb;
584
585 lvb = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
586 if (!lvb)
587 return -ENOMEM;
588
589 lp->lksb.sb_lvbptr = lvb;
590 lp->lvb = lvb;
591 return 0;
592}
593
594static void gdlm_del_lvb(struct gdlm_lock *lp)
595{
596 kfree(lp->lvb);
597 lp->lvb = NULL;
598 lp->lksb.sb_lvbptr = NULL;
599}
600
601static int gdlm_ast_wait(void *word)
602{
603 schedule();
604 return 0;
605}
606
607/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
608 the completion) because gfs won't call hold_lvb() during a callback (from
609 the context of a lock_dlm thread). */
610
611static int hold_null_lock(struct gdlm_lock *lp)
612{
613 struct gdlm_lock *lpn = NULL;
614 int error;
615
616 if (lp->hold_null) {
617 printk(KERN_INFO "lock_dlm: lvb already held\n");
618 return 0;
619 }
620
621 error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
622 if (error)
623 goto out;
624
625 lpn->lksb.sb_lvbptr = junk_lvb;
626 lpn->lvb = junk_lvb;
627
628 lpn->req = DLM_LOCK_NL;
629 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
630 set_bit(LFL_NOBAST, &lpn->flags);
631 set_bit(LFL_INLOCK, &lpn->flags);
632 set_bit(LFL_AST_WAIT, &lpn->flags);
633
634 gdlm_do_lock(lpn);
635 wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
636 error = lpn->lksb.sb_status;
637 if (error) {
638 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
639 error);
640 gdlm_delete_lp(lpn);
641 lpn = NULL;
642 }
643out:
644 lp->hold_null = lpn;
645 return error;
646}
647
648/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
649 the completion) because gfs may call unhold_lvb() during a callback (from
650 the context of a lock_dlm thread) which could cause a deadlock since the
651 other lock_dlm thread could be engaged in recovery. */
652
653static void unhold_null_lock(struct gdlm_lock *lp)
654{
655 struct gdlm_lock *lpn = lp->hold_null;
656
657 gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type,
658 (unsigned long long)lp->lockname.ln_number);
659 lpn->lksb.sb_lvbptr = NULL;
660 lpn->lvb = NULL;
661 set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
662 gdlm_do_unlock(lpn);
663 lp->hold_null = NULL;
664}
665
666/* Acquire a NL lock because gfs requires the value block to remain
667 intact on the resource while the lvb is "held" even if it's holding no locks
668 on the resource. */
669
670int gdlm_hold_lvb(void *lock, char **lvbp)
671{
672 struct gdlm_lock *lp = lock;
673 int error;
674
675 error = gdlm_add_lvb(lp);
676 if (error)
677 return error;
678
679 *lvbp = lp->lvb;
680
681 error = hold_null_lock(lp);
682 if (error)
683 gdlm_del_lvb(lp);
684
685 return error;
686}
687
688void gdlm_unhold_lvb(void *lock, char *lvb)
689{
690 struct gdlm_lock *lp = lock;
691
692 unhold_null_lock(lp);
693 gdlm_del_lvb(lp);
694}
695
696void gdlm_submit_delayed(struct gdlm_ls *ls)
697{
698 struct gdlm_lock *lp, *safe;
699
700 spin_lock(&ls->async_lock);
701 list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
702 list_del_init(&lp->delay_list);
703 list_add_tail(&lp->delay_list, &ls->submit);
704 }
705 spin_unlock(&ls->async_lock);
706 wake_up(&ls->thread_wait);
707}
708
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
deleted file mode 100644
index 3c98e7c6f93b..000000000000
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ /dev/null
@@ -1,166 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef LOCK_DLM_DOT_H
11#define LOCK_DLM_DOT_H
12
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/spinlock.h>
16#include <linux/types.h>
17#include <linux/string.h>
18#include <linux/list.h>
19#include <linux/socket.h>
20#include <linux/delay.h>
21#include <linux/kthread.h>
22#include <linux/kobject.h>
23#include <linux/fcntl.h>
24#include <linux/wait.h>
25#include <net/sock.h>
26
27#include <linux/dlm.h>
28#include <linux/dlm_plock.h>
29#include <linux/lm_interface.h>
30
31/*
32 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
33 * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
34 * as "lock_dlm".
35 */
36
37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 0
40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128
42
43/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
44 We sprintf these numbers into a 24 byte string of hex values to make them
45 human-readable (to make debugging simpler.) */
46
47struct gdlm_strname {
48 unsigned char name[GDLM_STRNAME_BYTES];
49 unsigned short namelen;
50};
51
52enum {
53 DFL_BLOCK_LOCKS = 0,
54 DFL_SPECTATOR = 1,
55 DFL_WITHDRAW = 2,
56};
57
58struct gdlm_ls {
59 u32 id;
60 int jid;
61 int first;
62 int first_done;
63 unsigned long flags;
64 struct kobject kobj;
65 char clustername[GDLM_NAME_LEN];
66 char fsname[GDLM_NAME_LEN];
67 int fsflags;
68 dlm_lockspace_t *dlm_lockspace;
69 lm_callback_t fscb;
70 struct gfs2_sbd *sdp;
71 int recover_jid;
72 int recover_jid_done;
73 int recover_jid_status;
74 spinlock_t async_lock;
75 struct list_head delayed;
76 struct list_head submit;
77 u32 all_locks_count;
78 wait_queue_head_t wait_control;
79 struct task_struct *thread;
80 wait_queue_head_t thread_wait;
81};
82
83enum {
84 LFL_NOBLOCK = 0,
85 LFL_NOCACHE = 1,
86 LFL_DLM_UNLOCK = 2,
87 LFL_DLM_CANCEL = 3,
88 LFL_SYNC_LVB = 4,
89 LFL_FORCE_PROMOTE = 5,
90 LFL_REREQUEST = 6,
91 LFL_ACTIVE = 7,
92 LFL_INLOCK = 8,
93 LFL_CANCEL = 9,
94 LFL_NOBAST = 10,
95 LFL_HEADQUE = 11,
96 LFL_UNLOCK_DELETE = 12,
97 LFL_AST_WAIT = 13,
98};
99
100struct gdlm_lock {
101 struct gdlm_ls *ls;
102 struct lm_lockname lockname;
103 struct gdlm_strname strname;
104 char *lvb;
105 struct dlm_lksb lksb;
106
107 s16 cur;
108 s16 req;
109 s16 prev_req;
110 u32 lkf; /* dlm flags DLM_LKF_ */
111 unsigned long flags; /* lock_dlm flags LFL_ */
112
113 struct list_head delay_list; /* delayed */
114 struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
115};
116
117#define gdlm_assert(assertion, fmt, args...) \
118do { \
119 if (unlikely(!(assertion))) { \
120 printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
121 "lock_dlm: " fmt "\n", \
122 #assertion, ##args); \
123 BUG(); \
124 } \
125} while (0)
126
127#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
128#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
129#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
130#ifdef LOCK_DLM_LOG_DEBUG
131#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
132#else
133#define log_debug(fmt, arg...)
134#endif
135
136/* sysfs.c */
137
138int gdlm_sysfs_init(void);
139void gdlm_sysfs_exit(void);
140int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
141void gdlm_kobject_release(struct gdlm_ls *);
142
143/* thread.c */
144
145int gdlm_init_threads(struct gdlm_ls *);
146void gdlm_release_threads(struct gdlm_ls *);
147
148/* lock.c */
149
150void gdlm_submit_delayed(struct gdlm_ls *);
151unsigned int gdlm_do_lock(struct gdlm_lock *);
152
153int gdlm_get_lock(void *, struct lm_lockname *, void **);
154void gdlm_put_lock(void *);
155unsigned int gdlm_lock(void *, unsigned int, unsigned int, unsigned int);
156unsigned int gdlm_unlock(void *, unsigned int);
157void gdlm_cancel(void *);
158int gdlm_hold_lvb(void *, char **);
159void gdlm_unhold_lvb(void *, char *);
160
161/* mount.c */
162
163extern const struct lm_lockops gdlm_ops;
164
165#endif
166
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
deleted file mode 100644
index b9a03a7ff801..000000000000
--- a/fs/gfs2/locking/dlm/main.c
+++ /dev/null
@@ -1,48 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/init.h>
11
12#include "lock_dlm.h"
13
14static int __init init_lock_dlm(void)
15{
16 int error;
17
18 error = gfs2_register_lockproto(&gdlm_ops);
19 if (error) {
20 printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
21 error);
22 return error;
23 }
24
25 error = gdlm_sysfs_init();
26 if (error) {
27 gfs2_unregister_lockproto(&gdlm_ops);
28 return error;
29 }
30
31 printk(KERN_INFO
32 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
33 return 0;
34}
35
36static void __exit exit_lock_dlm(void)
37{
38 gdlm_sysfs_exit();
39 gfs2_unregister_lockproto(&gdlm_ops);
40}
41
42module_init(init_lock_dlm);
43module_exit(exit_lock_dlm);
44
45MODULE_DESCRIPTION("GFS DLM Locking Module");
46MODULE_AUTHOR("Red Hat, Inc.");
47MODULE_LICENSE("GPL");
48
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
deleted file mode 100644
index 1aa7eb6a0226..000000000000
--- a/fs/gfs2/locking/dlm/mount.c
+++ /dev/null
@@ -1,276 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12const struct lm_lockops gdlm_ops;
13
14
15static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
16 int flags, char *table_name)
17{
18 struct gdlm_ls *ls;
19 char buf[256], *p;
20
21 ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
22 if (!ls)
23 return NULL;
24
25 ls->fscb = cb;
26 ls->sdp = sdp;
27 ls->fsflags = flags;
28 spin_lock_init(&ls->async_lock);
29 INIT_LIST_HEAD(&ls->delayed);
30 INIT_LIST_HEAD(&ls->submit);
31 init_waitqueue_head(&ls->thread_wait);
32 init_waitqueue_head(&ls->wait_control);
33 ls->jid = -1;
34
35 strncpy(buf, table_name, 256);
36 buf[255] = '\0';
37
38 p = strchr(buf, ':');
39 if (!p) {
40 log_info("invalid table_name \"%s\"", table_name);
41 kfree(ls);
42 return NULL;
43 }
44 *p = '\0';
45 p++;
46
47 strncpy(ls->clustername, buf, GDLM_NAME_LEN);
48 strncpy(ls->fsname, p, GDLM_NAME_LEN);
49
50 return ls;
51}
52
53static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
54{
55 char data[256];
56 char *options, *x, *y;
57 int error = 0;
58
59 memset(data, 0, 256);
60 strncpy(data, data_arg, 255);
61
62 if (!strlen(data)) {
63 log_error("no mount options, (u)mount helpers not installed");
64 return -EINVAL;
65 }
66
67 for (options = data; (x = strsep(&options, ":")); ) {
68 if (!*x)
69 continue;
70
71 y = strchr(x, '=');
72 if (y)
73 *y++ = 0;
74
75 if (!strcmp(x, "jid")) {
76 if (!y) {
77 log_error("need argument to jid");
78 error = -EINVAL;
79 break;
80 }
81 sscanf(y, "%u", &ls->jid);
82
83 } else if (!strcmp(x, "first")) {
84 if (!y) {
85 log_error("need argument to first");
86 error = -EINVAL;
87 break;
88 }
89 sscanf(y, "%u", &ls->first);
90
91 } else if (!strcmp(x, "id")) {
92 if (!y) {
93 log_error("need argument to id");
94 error = -EINVAL;
95 break;
96 }
97 sscanf(y, "%u", &ls->id);
98
99 } else if (!strcmp(x, "nodir")) {
100 if (!y) {
101 log_error("need argument to nodir");
102 error = -EINVAL;
103 break;
104 }
105 sscanf(y, "%u", nodir);
106
107 } else {
108 log_error("unkonwn option: %s", x);
109 error = -EINVAL;
110 break;
111 }
112 }
113
114 return error;
115}
116
117static int gdlm_mount(char *table_name, char *host_data,
118 lm_callback_t cb, void *cb_data,
119 unsigned int min_lvb_size, int flags,
120 struct lm_lockstruct *lockstruct,
121 struct kobject *fskobj)
122{
123 struct gdlm_ls *ls;
124 int error = -ENOMEM, nodir = 0;
125
126 if (min_lvb_size > GDLM_LVB_SIZE)
127 goto out;
128
129 ls = init_gdlm(cb, cb_data, flags, table_name);
130 if (!ls)
131 goto out;
132
133 error = make_args(ls, host_data, &nodir);
134 if (error)
135 goto out;
136
137 error = gdlm_init_threads(ls);
138 if (error)
139 goto out_free;
140
141 error = gdlm_kobject_setup(ls, fskobj);
142 if (error)
143 goto out_thread;
144
145 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
146 &ls->dlm_lockspace,
147 DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
148 (nodir ? DLM_LSFL_NODIR : 0),
149 GDLM_LVB_SIZE);
150 if (error) {
151 log_error("dlm_new_lockspace error %d", error);
152 goto out_kobj;
153 }
154
155 lockstruct->ls_jid = ls->jid;
156 lockstruct->ls_first = ls->first;
157 lockstruct->ls_lockspace = ls;
158 lockstruct->ls_ops = &gdlm_ops;
159 lockstruct->ls_flags = 0;
160 lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
161 return 0;
162
163out_kobj:
164 gdlm_kobject_release(ls);
165out_thread:
166 gdlm_release_threads(ls);
167out_free:
168 kfree(ls);
169out:
170 return error;
171}
172
173static void gdlm_unmount(void *lockspace)
174{
175 struct gdlm_ls *ls = lockspace;
176
177 log_debug("unmount flags %lx", ls->flags);
178
179 /* FIXME: serialize unmount and withdraw in case they
180 happen at once. Also, if unmount follows withdraw,
181 wait for withdraw to finish. */
182
183 if (test_bit(DFL_WITHDRAW, &ls->flags))
184 goto out;
185
186 gdlm_kobject_release(ls);
187 dlm_release_lockspace(ls->dlm_lockspace, 2);
188 gdlm_release_threads(ls);
189 BUG_ON(ls->all_locks_count);
190out:
191 kfree(ls);
192}
193
194static void gdlm_recovery_done(void *lockspace, unsigned int jid,
195 unsigned int message)
196{
197 char env_jid[20];
198 char env_status[20];
199 char *envp[] = { env_jid, env_status, NULL };
200 struct gdlm_ls *ls = lockspace;
201 ls->recover_jid_done = jid;
202 ls->recover_jid_status = message;
203 sprintf(env_jid, "JID=%d", jid);
204 sprintf(env_status, "RECOVERY=%s",
205 message == LM_RD_SUCCESS ? "Done" : "Failed");
206 kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
207}
208
209static void gdlm_others_may_mount(void *lockspace)
210{
211 char *message = "FIRSTMOUNT=Done";
212 char *envp[] = { message, NULL };
213 struct gdlm_ls *ls = lockspace;
214 ls->first_done = 1;
215 kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
216}
217
218/* Userspace gets the offline uevent, blocks new gfs locks on
219 other mounters, and lets us know (sets WITHDRAW flag). Then,
220 userspace leaves the mount group while we leave the lockspace. */
221
222static void gdlm_withdraw(void *lockspace)
223{
224 struct gdlm_ls *ls = lockspace;
225
226 kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
227
228 wait_event_interruptible(ls->wait_control,
229 test_bit(DFL_WITHDRAW, &ls->flags));
230
231 dlm_release_lockspace(ls->dlm_lockspace, 2);
232 gdlm_release_threads(ls);
233 gdlm_kobject_release(ls);
234}
235
236static int gdlm_plock(void *lockspace, struct lm_lockname *name,
237 struct file *file, int cmd, struct file_lock *fl)
238{
239 struct gdlm_ls *ls = lockspace;
240 return dlm_posix_lock(ls->dlm_lockspace, name->ln_number, file, cmd, fl);
241}
242
243static int gdlm_punlock(void *lockspace, struct lm_lockname *name,
244 struct file *file, struct file_lock *fl)
245{
246 struct gdlm_ls *ls = lockspace;
247 return dlm_posix_unlock(ls->dlm_lockspace, name->ln_number, file, fl);
248}
249
250static int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
251 struct file *file, struct file_lock *fl)
252{
253 struct gdlm_ls *ls = lockspace;
254 return dlm_posix_get(ls->dlm_lockspace, name->ln_number, file, fl);
255}
256
257const struct lm_lockops gdlm_ops = {
258 .lm_proto_name = "lock_dlm",
259 .lm_mount = gdlm_mount,
260 .lm_others_may_mount = gdlm_others_may_mount,
261 .lm_unmount = gdlm_unmount,
262 .lm_withdraw = gdlm_withdraw,
263 .lm_get_lock = gdlm_get_lock,
264 .lm_put_lock = gdlm_put_lock,
265 .lm_lock = gdlm_lock,
266 .lm_unlock = gdlm_unlock,
267 .lm_plock = gdlm_plock,
268 .lm_punlock = gdlm_punlock,
269 .lm_plock_get = gdlm_plock_get,
270 .lm_cancel = gdlm_cancel,
271 .lm_hold_lvb = gdlm_hold_lvb,
272 .lm_unhold_lvb = gdlm_unhold_lvb,
273 .lm_recovery_done = gdlm_recovery_done,
274 .lm_owner = THIS_MODULE,
275};
276
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
deleted file mode 100644
index 9b7edcf7bd49..000000000000
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ /dev/null
@@ -1,226 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/ctype.h>
11#include <linux/stat.h>
12
13#include "lock_dlm.h"
14
15static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
16{
17 return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
18}
19
20static ssize_t block_show(struct gdlm_ls *ls, char *buf)
21{
22 ssize_t ret;
23 int val = 0;
24
25 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
26 val = 1;
27 ret = sprintf(buf, "%d\n", val);
28 return ret;
29}
30
31static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
32{
33 ssize_t ret = len;
34 int val;
35
36 val = simple_strtol(buf, NULL, 0);
37
38 if (val == 1)
39 set_bit(DFL_BLOCK_LOCKS, &ls->flags);
40 else if (val == 0) {
41 clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
42 gdlm_submit_delayed(ls);
43 } else {
44 ret = -EINVAL;
45 }
46 return ret;
47}
48
49static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
50{
51 ssize_t ret;
52 int val = 0;
53
54 if (test_bit(DFL_WITHDRAW, &ls->flags))
55 val = 1;
56 ret = sprintf(buf, "%d\n", val);
57 return ret;
58}
59
60static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
61{
62 ssize_t ret = len;
63 int val;
64
65 val = simple_strtol(buf, NULL, 0);
66
67 if (val == 1)
68 set_bit(DFL_WITHDRAW, &ls->flags);
69 else
70 ret = -EINVAL;
71 wake_up(&ls->wait_control);
72 return ret;
73}
74
75static ssize_t id_show(struct gdlm_ls *ls, char *buf)
76{
77 return sprintf(buf, "%u\n", ls->id);
78}
79
80static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
81{
82 return sprintf(buf, "%d\n", ls->jid);
83}
84
85static ssize_t first_show(struct gdlm_ls *ls, char *buf)
86{
87 return sprintf(buf, "%d\n", ls->first);
88}
89
90static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
91{
92 return sprintf(buf, "%d\n", ls->first_done);
93}
94
95static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
96{
97 return sprintf(buf, "%d\n", ls->recover_jid);
98}
99
100static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
101{
102 ls->recover_jid = simple_strtol(buf, NULL, 0);
103 ls->fscb(ls->sdp, LM_CB_NEED_RECOVERY, &ls->recover_jid);
104 return len;
105}
106
107static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
108{
109 return sprintf(buf, "%d\n", ls->recover_jid_done);
110}
111
112static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
113{
114 return sprintf(buf, "%d\n", ls->recover_jid_status);
115}
116
117struct gdlm_attr {
118 struct attribute attr;
119 ssize_t (*show)(struct gdlm_ls *, char *);
120 ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
121};
122
123#define GDLM_ATTR(_name,_mode,_show,_store) \
124static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
125
126GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
127GDLM_ATTR(block, 0644, block_show, block_store);
128GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
129GDLM_ATTR(id, 0444, id_show, NULL);
130GDLM_ATTR(jid, 0444, jid_show, NULL);
131GDLM_ATTR(first, 0444, first_show, NULL);
132GDLM_ATTR(first_done, 0444, first_done_show, NULL);
133GDLM_ATTR(recover, 0644, recover_show, recover_store);
134GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
135GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
136
137static struct attribute *gdlm_attrs[] = {
138 &gdlm_attr_proto_name.attr,
139 &gdlm_attr_block.attr,
140 &gdlm_attr_withdraw.attr,
141 &gdlm_attr_id.attr,
142 &gdlm_attr_jid.attr,
143 &gdlm_attr_first.attr,
144 &gdlm_attr_first_done.attr,
145 &gdlm_attr_recover.attr,
146 &gdlm_attr_recover_done.attr,
147 &gdlm_attr_recover_status.attr,
148 NULL,
149};
150
151static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
152 char *buf)
153{
154 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
155 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
156 return a->show ? a->show(ls, buf) : 0;
157}
158
159static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
160 const char *buf, size_t len)
161{
162 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
163 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
164 return a->store ? a->store(ls, buf, len) : len;
165}
166
167static struct sysfs_ops gdlm_attr_ops = {
168 .show = gdlm_attr_show,
169 .store = gdlm_attr_store,
170};
171
172static struct kobj_type gdlm_ktype = {
173 .default_attrs = gdlm_attrs,
174 .sysfs_ops = &gdlm_attr_ops,
175};
176
177static struct kset *gdlm_kset;
178
179int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
180{
181 int error;
182
183 ls->kobj.kset = gdlm_kset;
184 error = kobject_init_and_add(&ls->kobj, &gdlm_ktype, fskobj,
185 "lock_module");
186 if (error)
187 log_error("can't register kobj %d", error);
188 kobject_uevent(&ls->kobj, KOBJ_ADD);
189
190 return error;
191}
192
193void gdlm_kobject_release(struct gdlm_ls *ls)
194{
195 kobject_put(&ls->kobj);
196}
197
198static int gdlm_uevent(struct kset *kset, struct kobject *kobj,
199 struct kobj_uevent_env *env)
200{
201 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
202 add_uevent_var(env, "LOCKTABLE=%s:%s", ls->clustername, ls->fsname);
203 add_uevent_var(env, "LOCKPROTO=lock_dlm");
204 return 0;
205}
206
207static struct kset_uevent_ops gdlm_uevent_ops = {
208 .uevent = gdlm_uevent,
209};
210
211
212int gdlm_sysfs_init(void)
213{
214 gdlm_kset = kset_create_and_add("lock_dlm", &gdlm_uevent_ops, kernel_kobj);
215 if (!gdlm_kset) {
216 printk(KERN_WARNING "%s: can not create kset\n", __func__);
217 return -ENOMEM;
218 }
219 return 0;
220}
221
222void gdlm_sysfs_exit(void)
223{
224 kset_unregister(gdlm_kset);
225}
226
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
deleted file mode 100644
index 38823efd698c..000000000000
--- a/fs/gfs2/locking/dlm/thread.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include "lock_dlm.h"
11
12static inline int no_work(struct gdlm_ls *ls)
13{
14 int ret;
15
16 spin_lock(&ls->async_lock);
17 ret = list_empty(&ls->submit);
18 spin_unlock(&ls->async_lock);
19
20 return ret;
21}
22
23static int gdlm_thread(void *data)
24{
25 struct gdlm_ls *ls = (struct gdlm_ls *) data;
26 struct gdlm_lock *lp = NULL;
27
28 while (!kthread_should_stop()) {
29 wait_event_interruptible(ls->thread_wait,
30 !no_work(ls) || kthread_should_stop());
31
32 spin_lock(&ls->async_lock);
33
34 if (!list_empty(&ls->submit)) {
35 lp = list_entry(ls->submit.next, struct gdlm_lock,
36 delay_list);
37 list_del_init(&lp->delay_list);
38 spin_unlock(&ls->async_lock);
39 gdlm_do_lock(lp);
40 spin_lock(&ls->async_lock);
41 }
42 spin_unlock(&ls->async_lock);
43 }
44
45 return 0;
46}
47
48int gdlm_init_threads(struct gdlm_ls *ls)
49{
50 struct task_struct *p;
51 int error;
52
53 p = kthread_run(gdlm_thread, ls, "lock_dlm");
54 error = IS_ERR(p);
55 if (error) {
56 log_error("can't start lock_dlm thread %d", error);
57 return error;
58 }
59 ls->thread = p;
60
61 return 0;
62}
63
64void gdlm_release_threads(struct gdlm_ls *ls)
65{
66 kthread_stop(ls->thread);
67}
68
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index ad305854bdc6..98918a756410 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -14,7 +14,6 @@
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h> 16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18#include <linux/delay.h> 17#include <linux/delay.h>
19#include <linux/kthread.h> 18#include <linux/kthread.h>
20#include <linux/freezer.h> 19#include <linux/freezer.h>
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4390f6f4047d..80e4f5f898bb 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -13,7 +13,6 @@
13#include <linux/completion.h> 13#include <linux/completion.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
19#include "incore.h" 18#include "incore.h"
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 7cacfde32194..a6892ed0840a 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,7 +14,6 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/lm_interface.h>
18#include <asm/atomic.h> 17#include <asm/atomic.h>
19 18
20#include "gfs2.h" 19#include "gfs2.h"
@@ -23,6 +22,12 @@
23#include "sys.h" 22#include "sys.h"
24#include "util.h" 23#include "util.h"
25#include "glock.h" 24#include "glock.h"
25#include "quota.h"
26
27static struct shrinker qd_shrinker = {
28 .shrink = gfs2_shrink_qd_memory,
29 .seeks = DEFAULT_SEEKS,
30};
26 31
27static void gfs2_init_inode_once(void *foo) 32static void gfs2_init_inode_once(void *foo)
28{ 33{
@@ -41,8 +46,6 @@ static void gfs2_init_glock_once(void *foo)
41 INIT_HLIST_NODE(&gl->gl_list); 46 INIT_HLIST_NODE(&gl->gl_list);
42 spin_lock_init(&gl->gl_spin); 47 spin_lock_init(&gl->gl_spin);
43 INIT_LIST_HEAD(&gl->gl_holders); 48 INIT_LIST_HEAD(&gl->gl_holders);
44 gl->gl_lvb = NULL;
45 atomic_set(&gl->gl_lvb_count, 0);
46 INIT_LIST_HEAD(&gl->gl_lru); 49 INIT_LIST_HEAD(&gl->gl_lru);
47 INIT_LIST_HEAD(&gl->gl_ail_list); 50 INIT_LIST_HEAD(&gl->gl_ail_list);
48 atomic_set(&gl->gl_ail_count, 0); 51 atomic_set(&gl->gl_ail_count, 0);
@@ -100,6 +103,8 @@ static int __init init_gfs2_fs(void)
100 if (!gfs2_quotad_cachep) 103 if (!gfs2_quotad_cachep)
101 goto fail; 104 goto fail;
102 105
106 register_shrinker(&qd_shrinker);
107
103 error = register_filesystem(&gfs2_fs_type); 108 error = register_filesystem(&gfs2_fs_type);
104 if (error) 109 if (error)
105 goto fail; 110 goto fail;
@@ -117,6 +122,7 @@ static int __init init_gfs2_fs(void)
117fail_unregister: 122fail_unregister:
118 unregister_filesystem(&gfs2_fs_type); 123 unregister_filesystem(&gfs2_fs_type);
119fail: 124fail:
125 unregister_shrinker(&qd_shrinker);
120 gfs2_glock_exit(); 126 gfs2_glock_exit();
121 127
122 if (gfs2_quotad_cachep) 128 if (gfs2_quotad_cachep)
@@ -145,6 +151,7 @@ fail:
145 151
146static void __exit exit_gfs2_fs(void) 152static void __exit exit_gfs2_fs(void)
147{ 153{
154 unregister_shrinker(&qd_shrinker);
148 gfs2_glock_exit(); 155 gfs2_glock_exit();
149 gfs2_unregister_debugfs(); 156 gfs2_unregister_debugfs();
150 unregister_filesystem(&gfs2_fs_type); 157 unregister_filesystem(&gfs2_fs_type);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 09853620c951..8d6f13256b26 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -19,7 +19,6 @@
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/lm_interface.h>
23 22
24#include "gfs2.h" 23#include "gfs2.h"
25#include "incore.h" 24#include "incore.h"
@@ -90,27 +89,6 @@ void gfs2_aspace_put(struct inode *aspace)
90} 89}
91 90
92/** 91/**
93 * gfs2_meta_inval - Invalidate all buffers associated with a glock
94 * @gl: the glock
95 *
96 */
97
98void gfs2_meta_inval(struct gfs2_glock *gl)
99{
100 struct gfs2_sbd *sdp = gl->gl_sbd;
101 struct inode *aspace = gl->gl_aspace;
102 struct address_space *mapping = gl->gl_aspace->i_mapping;
103
104 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
105
106 atomic_inc(&aspace->i_writecount);
107 truncate_inode_pages(mapping, 0);
108 atomic_dec(&aspace->i_writecount);
109
110 gfs2_assert_withdraw(sdp, !mapping->nrpages);
111}
112
113/**
114 * gfs2_meta_sync - Sync all buffers associated with a glock 92 * gfs2_meta_sync - Sync all buffers associated with a glock
115 * @gl: The glock 93 * @gl: The glock
116 * 94 *
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index b1a5f3674d43..de270c2f9b63 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -40,7 +40,6 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
40struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp); 40struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
41void gfs2_aspace_put(struct inode *aspace); 41void gfs2_aspace_put(struct inode *aspace);
42 42
43void gfs2_meta_inval(struct gfs2_glock *gl);
44void gfs2_meta_sync(struct gfs2_glock *gl); 43void gfs2_meta_sync(struct gfs2_glock *gl);
45 44
46struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); 45struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 3cb0a44ba023..f7e8527a21e0 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -12,12 +12,11 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/lm_interface.h>
16#include <linux/parser.h> 15#include <linux/parser.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
19#include "incore.h" 18#include "incore.h"
20#include "mount.h" 19#include "super.h"
21#include "sys.h" 20#include "sys.h"
22#include "util.h" 21#include "util.h"
23 22
@@ -37,11 +36,15 @@ enum {
37 Opt_quota_off, 36 Opt_quota_off,
38 Opt_quota_account, 37 Opt_quota_account,
39 Opt_quota_on, 38 Opt_quota_on,
39 Opt_quota,
40 Opt_noquota,
40 Opt_suiddir, 41 Opt_suiddir,
41 Opt_nosuiddir, 42 Opt_nosuiddir,
42 Opt_data_writeback, 43 Opt_data_writeback,
43 Opt_data_ordered, 44 Opt_data_ordered,
44 Opt_meta, 45 Opt_meta,
46 Opt_discard,
47 Opt_nodiscard,
45 Opt_err, 48 Opt_err,
46}; 49};
47 50
@@ -61,11 +64,15 @@ static const match_table_t tokens = {
61 {Opt_quota_off, "quota=off"}, 64 {Opt_quota_off, "quota=off"},
62 {Opt_quota_account, "quota=account"}, 65 {Opt_quota_account, "quota=account"},
63 {Opt_quota_on, "quota=on"}, 66 {Opt_quota_on, "quota=on"},
67 {Opt_quota, "quota"},
68 {Opt_noquota, "noquota"},
64 {Opt_suiddir, "suiddir"}, 69 {Opt_suiddir, "suiddir"},
65 {Opt_nosuiddir, "nosuiddir"}, 70 {Opt_nosuiddir, "nosuiddir"},
66 {Opt_data_writeback, "data=writeback"}, 71 {Opt_data_writeback, "data=writeback"},
67 {Opt_data_ordered, "data=ordered"}, 72 {Opt_data_ordered, "data=ordered"},
68 {Opt_meta, "meta"}, 73 {Opt_meta, "meta"},
74 {Opt_discard, "discard"},
75 {Opt_nodiscard, "nodiscard"},
69 {Opt_err, NULL} 76 {Opt_err, NULL}
70}; 77};
71 78
@@ -77,101 +84,46 @@ static const match_table_t tokens = {
77 * Return: errno 84 * Return: errno
78 */ 85 */
79 86
80int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) 87int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
81{ 88{
82 struct gfs2_args *args = &sdp->sd_args; 89 char *o;
83 char *data = data_arg; 90 int token;
84 char *options, *o, *v; 91 substring_t tmp[MAX_OPT_ARGS];
85 int error = 0;
86
87 if (!remount) {
88 /* Set some defaults */
89 args->ar_quota = GFS2_QUOTA_DEFAULT;
90 args->ar_data = GFS2_DATA_DEFAULT;
91 }
92 92
93 /* Split the options into tokens with the "," character and 93 /* Split the options into tokens with the "," character and
94 process them */ 94 process them */
95 95
96 for (options = data; (o = strsep(&options, ",")); ) { 96 while (1) {
97 int token; 97 o = strsep(&options, ",");
98 substring_t tmp[MAX_OPT_ARGS]; 98 if (o == NULL)
99 99 break;
100 if (!*o) 100 if (*o == '\0')
101 continue; 101 continue;
102 102
103 token = match_token(o, tokens, tmp); 103 token = match_token(o, tokens, tmp);
104 switch (token) { 104 switch (token) {
105 case Opt_lockproto: 105 case Opt_lockproto:
106 v = match_strdup(&tmp[0]); 106 match_strlcpy(args->ar_lockproto, &tmp[0],
107 if (!v) { 107 GFS2_LOCKNAME_LEN);
108 fs_info(sdp, "no memory for lockproto\n");
109 error = -ENOMEM;
110 goto out_error;
111 }
112
113 if (remount && strcmp(v, args->ar_lockproto)) {
114 kfree(v);
115 goto cant_remount;
116 }
117
118 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
119 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
120 kfree(v);
121 break; 108 break;
122 case Opt_locktable: 109 case Opt_locktable:
123 v = match_strdup(&tmp[0]); 110 match_strlcpy(args->ar_locktable, &tmp[0],
124 if (!v) { 111 GFS2_LOCKNAME_LEN);
125 fs_info(sdp, "no memory for locktable\n");
126 error = -ENOMEM;
127 goto out_error;
128 }
129
130 if (remount && strcmp(v, args->ar_locktable)) {
131 kfree(v);
132 goto cant_remount;
133 }
134
135 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
136 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
137 kfree(v);
138 break; 112 break;
139 case Opt_hostdata: 113 case Opt_hostdata:
140 v = match_strdup(&tmp[0]); 114 match_strlcpy(args->ar_hostdata, &tmp[0],
141 if (!v) { 115 GFS2_LOCKNAME_LEN);
142 fs_info(sdp, "no memory for hostdata\n");
143 error = -ENOMEM;
144 goto out_error;
145 }
146
147 if (remount && strcmp(v, args->ar_hostdata)) {
148 kfree(v);
149 goto cant_remount;
150 }
151
152 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
153 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
154 kfree(v);
155 break; 116 break;
156 case Opt_spectator: 117 case Opt_spectator:
157 if (remount && !args->ar_spectator)
158 goto cant_remount;
159 args->ar_spectator = 1; 118 args->ar_spectator = 1;
160 sdp->sd_vfs->s_flags |= MS_RDONLY;
161 break; 119 break;
162 case Opt_ignore_local_fs: 120 case Opt_ignore_local_fs:
163 if (remount && !args->ar_ignore_local_fs)
164 goto cant_remount;
165 args->ar_ignore_local_fs = 1; 121 args->ar_ignore_local_fs = 1;
166 break; 122 break;
167 case Opt_localflocks: 123 case Opt_localflocks:
168 if (remount && !args->ar_localflocks)
169 goto cant_remount;
170 args->ar_localflocks = 1; 124 args->ar_localflocks = 1;
171 break; 125 break;
172 case Opt_localcaching: 126 case Opt_localcaching:
173 if (remount && !args->ar_localcaching)
174 goto cant_remount;
175 args->ar_localcaching = 1; 127 args->ar_localcaching = 1;
176 break; 128 break;
177 case Opt_debug: 129 case Opt_debug:
@@ -181,25 +133,23 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
181 args->ar_debug = 0; 133 args->ar_debug = 0;
182 break; 134 break;
183 case Opt_upgrade: 135 case Opt_upgrade:
184 if (remount && !args->ar_upgrade)
185 goto cant_remount;
186 args->ar_upgrade = 1; 136 args->ar_upgrade = 1;
187 break; 137 break;
188 case Opt_acl: 138 case Opt_acl:
189 args->ar_posix_acl = 1; 139 args->ar_posix_acl = 1;
190 sdp->sd_vfs->s_flags |= MS_POSIXACL;
191 break; 140 break;
192 case Opt_noacl: 141 case Opt_noacl:
193 args->ar_posix_acl = 0; 142 args->ar_posix_acl = 0;
194 sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
195 break; 143 break;
196 case Opt_quota_off: 144 case Opt_quota_off:
145 case Opt_noquota:
197 args->ar_quota = GFS2_QUOTA_OFF; 146 args->ar_quota = GFS2_QUOTA_OFF;
198 break; 147 break;
199 case Opt_quota_account: 148 case Opt_quota_account:
200 args->ar_quota = GFS2_QUOTA_ACCOUNT; 149 args->ar_quota = GFS2_QUOTA_ACCOUNT;
201 break; 150 break;
202 case Opt_quota_on: 151 case Opt_quota_on:
152 case Opt_quota:
203 args->ar_quota = GFS2_QUOTA_ON; 153 args->ar_quota = GFS2_QUOTA_ON;
204 break; 154 break;
205 case Opt_suiddir: 155 case Opt_suiddir:
@@ -215,29 +165,21 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
215 args->ar_data = GFS2_DATA_ORDERED; 165 args->ar_data = GFS2_DATA_ORDERED;
216 break; 166 break;
217 case Opt_meta: 167 case Opt_meta:
218 if (remount && args->ar_meta != 1)
219 goto cant_remount;
220 args->ar_meta = 1; 168 args->ar_meta = 1;
221 break; 169 break;
170 case Opt_discard:
171 args->ar_discard = 1;
172 break;
173 case Opt_nodiscard:
174 args->ar_discard = 0;
175 break;
222 case Opt_err: 176 case Opt_err:
223 default: 177 default:
224 fs_info(sdp, "unknown option: %s\n", o); 178 fs_info(sdp, "invalid mount option: %s\n", o);
225 error = -EINVAL; 179 return -EINVAL;
226 goto out_error;
227 } 180 }
228 } 181 }
229 182
230out_error: 183 return 0;
231 if (error)
232 fs_info(sdp, "invalid mount option(s)\n");
233
234 if (data != data_arg)
235 kfree(data);
236
237 return error;
238
239cant_remount:
240 fs_info(sdp, "can't remount with option %s\n", o);
241 return -EINVAL;
242} 184}
243 185
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
deleted file mode 100644
index 401288acfdf3..000000000000
--- a/fs/gfs2/mount.h
+++ /dev/null
@@ -1,17 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __MOUNT_DOT_H__
11#define __MOUNT_DOT_H__
12
13struct gfs2_sbd;
14
15int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
16
17#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 4ddab67867eb..a6dde1751e17 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -19,7 +19,6 @@
19#include <linux/writeback.h> 19#include <linux/writeback.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/lm_interface.h>
23#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
24 23
25#include "gfs2.h" 24#include "gfs2.h"
@@ -442,6 +441,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
442 */ 441 */
443 if (unlikely(page->index)) { 442 if (unlikely(page->index)) {
444 zero_user(page, 0, PAGE_CACHE_SIZE); 443 zero_user(page, 0, PAGE_CACHE_SIZE);
444 SetPageUptodate(page);
445 return 0; 445 return 0;
446 } 446 }
447 447
@@ -1096,6 +1096,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
1096 .releasepage = gfs2_releasepage, 1096 .releasepage = gfs2_releasepage,
1097 .direct_IO = gfs2_direct_IO, 1097 .direct_IO = gfs2_direct_IO,
1098 .migratepage = buffer_migrate_page, 1098 .migratepage = buffer_migrate_page,
1099 .is_partially_uptodate = block_is_partially_uptodate,
1099}; 1100};
1100 1101
1101static const struct address_space_operations gfs2_ordered_aops = { 1102static const struct address_space_operations gfs2_ordered_aops = {
@@ -1111,6 +1112,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
1111 .releasepage = gfs2_releasepage, 1112 .releasepage = gfs2_releasepage,
1112 .direct_IO = gfs2_direct_IO, 1113 .direct_IO = gfs2_direct_IO,
1113 .migratepage = buffer_migrate_page, 1114 .migratepage = buffer_migrate_page,
1115 .is_partially_uptodate = block_is_partially_uptodate,
1114}; 1116};
1115 1117
1116static const struct address_space_operations gfs2_jdata_aops = { 1118static const struct address_space_operations gfs2_jdata_aops = {
@@ -1125,6 +1127,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
1125 .bmap = gfs2_bmap, 1127 .bmap = gfs2_bmap,
1126 .invalidatepage = gfs2_invalidatepage, 1128 .invalidatepage = gfs2_invalidatepage,
1127 .releasepage = gfs2_releasepage, 1129 .releasepage = gfs2_releasepage,
1130 .is_partially_uptodate = block_is_partially_uptodate,
1128}; 1131};
1129 1132
1130void gfs2_set_aops(struct inode *inode) 1133void gfs2_set_aops(struct inode *inode)
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index c2ad36330ca3..5eb57b044382 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/crc32.h> 15#include <linux/crc32.h>
16#include <linux/lm_interface.h>
17 16
18#include "gfs2.h" 17#include "gfs2.h"
19#include "incore.h" 18#include "incore.h"
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 7fdeb14ddd1a..9200ef221716 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -14,7 +14,6 @@
14#include <linux/exportfs.h> 14#include <linux/exportfs.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h> 16#include <linux/crc32.h>
17#include <linux/lm_interface.h>
18 17
19#include "gfs2.h" 18#include "gfs2.h"
20#include "incore.h" 19#include "incore.h"
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 93fe41b67f97..3b9e8de3500b 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -20,9 +20,10 @@
20#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
21#include <linux/ext2_fs.h> 21#include <linux/ext2_fs.h>
22#include <linux/crc32.h> 22#include <linux/crc32.h>
23#include <linux/lm_interface.h>
24#include <linux/writeback.h> 23#include <linux/writeback.h>
25#include <asm/uaccess.h> 24#include <asm/uaccess.h>
25#include <linux/dlm.h>
26#include <linux/dlm_plock.h>
26 27
27#include "gfs2.h" 28#include "gfs2.h"
28#include "incore.h" 29#include "incore.h"
@@ -354,7 +355,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
354 if (ret) 355 if (ret)
355 goto out; 356 goto out;
356 357
358 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
357 set_bit(GIF_SW_PAGED, &ip->i_flags); 359 set_bit(GIF_SW_PAGED, &ip->i_flags);
360
358 ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); 361 ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
359 if (ret || !alloc_required) 362 if (ret || !alloc_required)
360 goto out_unlock; 363 goto out_unlock;
@@ -560,57 +563,24 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
560 return ret; 563 return ret;
561} 564}
562 565
566#ifdef CONFIG_GFS2_FS_LOCKING_DLM
567
563/** 568/**
564 * gfs2_setlease - acquire/release a file lease 569 * gfs2_setlease - acquire/release a file lease
565 * @file: the file pointer 570 * @file: the file pointer
566 * @arg: lease type 571 * @arg: lease type
567 * @fl: file lock 572 * @fl: file lock
568 * 573 *
574 * We don't currently have a way to enforce a lease across the whole
575 * cluster; until we do, disable leases (by just returning -EINVAL),
576 * unless the administrator has requested purely local locking.
577 *
569 * Returns: errno 578 * Returns: errno
570 */ 579 */
571 580
572static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) 581static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
573{ 582{
574 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); 583 return -EINVAL;
575
576 /*
577 * We don't currently have a way to enforce a lease across the whole
578 * cluster; until we do, disable leases (by just returning -EINVAL),
579 * unless the administrator has requested purely local locking.
580 */
581 if (!sdp->sd_args.ar_localflocks)
582 return -EINVAL;
583 return generic_setlease(file, arg, fl);
584}
585
586static int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
587 struct file *file, struct file_lock *fl)
588{
589 int error = -EIO;
590 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
591 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
592 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
593 return error;
594}
595
596static int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
597 struct file *file, int cmd, struct file_lock *fl)
598{
599 int error = -EIO;
600 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
601 error = sdp->sd_lockstruct.ls_ops->lm_plock(
602 sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
603 return error;
604}
605
606static int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
607 struct file *file, struct file_lock *fl)
608{
609 int error = -EIO;
610 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
611 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
612 sdp->sd_lockstruct.ls_lockspace, name, file, fl);
613 return error;
614} 584}
615 585
616/** 586/**
@@ -626,9 +596,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
626{ 596{
627 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 597 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
628 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); 598 struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
629 struct lm_lockname name = 599 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
630 { .ln_number = ip->i_no_addr,
631 .ln_type = LM_TYPE_PLOCK };
632 600
633 if (!(fl->fl_flags & FL_POSIX)) 601 if (!(fl->fl_flags & FL_POSIX))
634 return -ENOLCK; 602 return -ENOLCK;
@@ -640,12 +608,14 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
640 cmd = F_SETLK; 608 cmd = F_SETLK;
641 fl->fl_type = F_UNLCK; 609 fl->fl_type = F_UNLCK;
642 } 610 }
611 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
612 return -EIO;
643 if (IS_GETLK(cmd)) 613 if (IS_GETLK(cmd))
644 return gfs2_lm_plock_get(sdp, &name, file, fl); 614 return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
645 else if (fl->fl_type == F_UNLCK) 615 else if (fl->fl_type == F_UNLCK)
646 return gfs2_lm_punlock(sdp, &name, file, fl); 616 return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
647 else 617 else
648 return gfs2_lm_plock(sdp, &name, file, cmd, fl); 618 return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
649} 619}
650 620
651static int do_flock(struct file *file, int cmd, struct file_lock *fl) 621static int do_flock(struct file *file, int cmd, struct file_lock *fl)
@@ -732,7 +702,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
732 } 702 }
733} 703}
734 704
735const struct file_operations gfs2_file_fops = { 705const struct file_operations *gfs2_file_fops = &(const struct file_operations){
736 .llseek = gfs2_llseek, 706 .llseek = gfs2_llseek,
737 .read = do_sync_read, 707 .read = do_sync_read,
738 .aio_read = generic_file_aio_read, 708 .aio_read = generic_file_aio_read,
@@ -750,7 +720,7 @@ const struct file_operations gfs2_file_fops = {
750 .setlease = gfs2_setlease, 720 .setlease = gfs2_setlease,
751}; 721};
752 722
753const struct file_operations gfs2_dir_fops = { 723const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
754 .readdir = gfs2_readdir, 724 .readdir = gfs2_readdir,
755 .unlocked_ioctl = gfs2_ioctl, 725 .unlocked_ioctl = gfs2_ioctl,
756 .open = gfs2_open, 726 .open = gfs2_open,
@@ -760,7 +730,9 @@ const struct file_operations gfs2_dir_fops = {
760 .flock = gfs2_flock, 730 .flock = gfs2_flock,
761}; 731};
762 732
763const struct file_operations gfs2_file_fops_nolock = { 733#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
734
735const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){
764 .llseek = gfs2_llseek, 736 .llseek = gfs2_llseek,
765 .read = do_sync_read, 737 .read = do_sync_read,
766 .aio_read = generic_file_aio_read, 738 .aio_read = generic_file_aio_read,
@@ -773,10 +745,10 @@ const struct file_operations gfs2_file_fops_nolock = {
773 .fsync = gfs2_fsync, 745 .fsync = gfs2_fsync,
774 .splice_read = generic_file_splice_read, 746 .splice_read = generic_file_splice_read,
775 .splice_write = generic_file_splice_write, 747 .splice_write = generic_file_splice_write,
776 .setlease = gfs2_setlease, 748 .setlease = generic_setlease,
777}; 749};
778 750
779const struct file_operations gfs2_dir_fops_nolock = { 751const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){
780 .readdir = gfs2_readdir, 752 .readdir = gfs2_readdir,
781 .unlocked_ioctl = gfs2_ioctl, 753 .unlocked_ioctl = gfs2_ioctl,
782 .open = gfs2_open, 754 .open = gfs2_open,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index f91eebdde581..51883b3ad89c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -17,7 +17,6 @@
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
20#include <linux/lm_interface.h>
21 20
22#include "gfs2.h" 21#include "gfs2.h"
23#include "incore.h" 22#include "incore.h"
@@ -25,7 +24,6 @@
25#include "glock.h" 24#include "glock.h"
26#include "glops.h" 25#include "glops.h"
27#include "inode.h" 26#include "inode.h"
28#include "mount.h"
29#include "recovery.h" 27#include "recovery.h"
30#include "rgrp.h" 28#include "rgrp.h"
31#include "super.h" 29#include "super.h"
@@ -64,7 +62,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
64 gt->gt_quota_warn_period = 10; 62 gt->gt_quota_warn_period = 10;
65 gt->gt_quota_scale_num = 1; 63 gt->gt_quota_scale_num = 1;
66 gt->gt_quota_scale_den = 1; 64 gt->gt_quota_scale_den = 1;
67 gt->gt_quota_cache_secs = 300;
68 gt->gt_quota_quantum = 60; 65 gt->gt_quota_quantum = 60;
69 gt->gt_new_files_jdata = 0; 66 gt->gt_new_files_jdata = 0;
70 gt->gt_max_readahead = 1 << 18; 67 gt->gt_max_readahead = 1 << 18;
@@ -100,7 +97,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
100 mutex_init(&sdp->sd_jindex_mutex); 97 mutex_init(&sdp->sd_jindex_mutex);
101 98
102 INIT_LIST_HEAD(&sdp->sd_quota_list); 99 INIT_LIST_HEAD(&sdp->sd_quota_list);
103 spin_lock_init(&sdp->sd_quota_spin);
104 mutex_init(&sdp->sd_quota_mutex); 100 mutex_init(&sdp->sd_quota_mutex);
105 init_waitqueue_head(&sdp->sd_quota_wait); 101 init_waitqueue_head(&sdp->sd_quota_wait);
106 INIT_LIST_HEAD(&sdp->sd_trunc_list); 102 INIT_LIST_HEAD(&sdp->sd_trunc_list);
@@ -238,6 +234,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
238 234
239 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); 235 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
240 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); 236 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
237 memcpy(sb->sb_uuid, str->sb_uuid, 16);
241} 238}
242 239
243/** 240/**
@@ -299,15 +296,15 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
299 __free_page(page); 296 __free_page(page);
300 return 0; 297 return 0;
301} 298}
299
302/** 300/**
303 * gfs2_read_sb - Read super block 301 * gfs2_read_sb - Read super block
304 * @sdp: The GFS2 superblock 302 * @sdp: The GFS2 superblock
305 * @gl: the glock for the superblock (assumed to be held)
306 * @silent: Don't print message if mount fails 303 * @silent: Don't print message if mount fails
307 * 304 *
308 */ 305 */
309 306
310static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) 307static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
311{ 308{
312 u32 hash_blocks, ind_blocks, leaf_blocks; 309 u32 hash_blocks, ind_blocks, leaf_blocks;
313 u32 tmp_blocks; 310 u32 tmp_blocks;
@@ -527,7 +524,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
527 return ret; 524 return ret;
528 } 525 }
529 526
530 ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); 527 ret = gfs2_read_sb(sdp, silent);
531 if (ret) { 528 if (ret) {
532 fs_err(sdp, "can't read superblock: %d\n", ret); 529 fs_err(sdp, "can't read superblock: %d\n", ret);
533 goto out; 530 goto out;
@@ -630,13 +627,13 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
630 return rc; 627 return rc;
631} 628}
632 629
633static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) 630static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
634{ 631{
635 if (!sdp->sd_lockstruct.ls_ops->lm_others_may_mount) 632 char *message = "FIRSTMOUNT=Done";
636 return; 633 char *envp[] = { message, NULL };
637 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 634 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
638 sdp->sd_lockstruct.ls_ops->lm_others_may_mount( 635 ls->ls_first_done = 1;
639 sdp->sd_lockstruct.ls_lockspace); 636 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
640} 637}
641 638
642/** 639/**
@@ -796,7 +793,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
796 } 793 }
797 } 794 }
798 795
799 gfs2_lm_others_may_mount(sdp); 796 gfs2_others_may_mount(sdp);
800 } else if (!sdp->sd_args.ar_spectator) { 797 } else if (!sdp->sd_args.ar_spectator) {
801 error = gfs2_recover_journal(sdp->sd_jdesc); 798 error = gfs2_recover_journal(sdp->sd_jdesc);
802 if (error) { 799 if (error) {
@@ -1005,7 +1002,6 @@ static int init_threads(struct gfs2_sbd *sdp, int undo)
1005 goto fail_quotad; 1002 goto fail_quotad;
1006 1003
1007 sdp->sd_log_flush_time = jiffies; 1004 sdp->sd_log_flush_time = jiffies;
1008 sdp->sd_jindex_refresh_time = jiffies;
1009 1005
1010 p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); 1006 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
1011 error = IS_ERR(p); 1007 error = IS_ERR(p);
@@ -1033,6 +1029,17 @@ fail:
1033 return error; 1029 return error;
1034} 1030}
1035 1031
1032static const match_table_t nolock_tokens = {
1033 { Opt_jid, "jid=%d\n", },
1034 { Opt_err, NULL },
1035};
1036
1037static const struct lm_lockops nolock_ops = {
1038 .lm_proto_name = "lock_nolock",
1039 .lm_put_lock = kmem_cache_free,
1040 .lm_tokens = &nolock_tokens,
1041};
1042
1036/** 1043/**
1037 * gfs2_lm_mount - mount a locking protocol 1044 * gfs2_lm_mount - mount a locking protocol
1038 * @sdp: the filesystem 1045 * @sdp: the filesystem
@@ -1044,31 +1051,73 @@ fail:
1044 1051
1045static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) 1052static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1046{ 1053{
1047 char *proto = sdp->sd_proto_name; 1054 const struct lm_lockops *lm;
1048 char *table = sdp->sd_table_name; 1055 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1049 int flags = LM_MFLAG_CONV_NODROP; 1056 struct gfs2_args *args = &sdp->sd_args;
1050 int error; 1057 const char *proto = sdp->sd_proto_name;
1058 const char *table = sdp->sd_table_name;
1059 const char *fsname;
1060 char *o, *options;
1061 int ret;
1051 1062
1052 if (sdp->sd_args.ar_spectator) 1063 if (!strcmp("lock_nolock", proto)) {
1053 flags |= LM_MFLAG_SPECTATOR; 1064 lm = &nolock_ops;
1065 sdp->sd_args.ar_localflocks = 1;
1066 sdp->sd_args.ar_localcaching = 1;
1067#ifdef CONFIG_GFS2_FS_LOCKING_DLM
1068 } else if (!strcmp("lock_dlm", proto)) {
1069 lm = &gfs2_dlm_ops;
1070#endif
1071 } else {
1072 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto);
1073 return -ENOENT;
1074 }
1054 1075
1055 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table); 1076 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
1056 1077
1057 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata, 1078 ls->ls_ops = lm;
1058 gfs2_glock_cb, sdp, 1079 ls->ls_first = 1;
1059 GFS2_MIN_LVB_SIZE, flags, 1080 ls->ls_id = 0;
1060 &sdp->sd_lockstruct, &sdp->sd_kobj);
1061 if (error) {
1062 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
1063 proto, table, sdp->sd_args.ar_hostdata);
1064 goto out;
1065 }
1066 1081
1067 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || 1082 for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) {
1068 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= 1083 substring_t tmp[MAX_OPT_ARGS];
1069 GFS2_MIN_LVB_SIZE)) { 1084 int token, option;
1070 gfs2_unmount_lockproto(&sdp->sd_lockstruct); 1085
1071 goto out; 1086 if (!o || !*o)
1087 continue;
1088
1089 token = match_token(o, *lm->lm_tokens, tmp);
1090 switch (token) {
1091 case Opt_jid:
1092 ret = match_int(&tmp[0], &option);
1093 if (ret || option < 0)
1094 goto hostdata_error;
1095 ls->ls_jid = option;
1096 break;
1097 case Opt_id:
1098 ret = match_int(&tmp[0], &option);
1099 if (ret)
1100 goto hostdata_error;
1101 ls->ls_id = option;
1102 break;
1103 case Opt_first:
1104 ret = match_int(&tmp[0], &option);
1105 if (ret || (option != 0 && option != 1))
1106 goto hostdata_error;
1107 ls->ls_first = option;
1108 break;
1109 case Opt_nodir:
1110 ret = match_int(&tmp[0], &option);
1111 if (ret || (option != 0 && option != 1))
1112 goto hostdata_error;
1113 ls->ls_nodir = option;
1114 break;
1115 case Opt_err:
1116 default:
1117hostdata_error:
1118 fs_info(sdp, "unknown hostdata (%s)\n", o);
1119 return -EINVAL;
1120 }
1072 } 1121 }
1073 1122
1074 if (sdp->sd_args.ar_spectator) 1123 if (sdp->sd_args.ar_spectator)
@@ -1077,22 +1126,25 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1077 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, 1126 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
1078 sdp->sd_lockstruct.ls_jid); 1127 sdp->sd_lockstruct.ls_jid);
1079 1128
1080 fs_info(sdp, "Joined cluster. Now mounting FS...\n"); 1129 fsname = strchr(table, ':');
1081 1130 if (fsname)
1082 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) && 1131 fsname++;
1083 !sdp->sd_args.ar_ignore_local_fs) { 1132 if (lm->lm_mount == NULL) {
1084 sdp->sd_args.ar_localflocks = 1; 1133 fs_info(sdp, "Now mounting FS...\n");
1085 sdp->sd_args.ar_localcaching = 1; 1134 return 0;
1086 } 1135 }
1087 1136 ret = lm->lm_mount(sdp, fsname);
1088out: 1137 if (ret == 0)
1089 return error; 1138 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
1139 return ret;
1090} 1140}
1091 1141
1092void gfs2_lm_unmount(struct gfs2_sbd *sdp) 1142void gfs2_lm_unmount(struct gfs2_sbd *sdp)
1093{ 1143{
1094 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 1144 const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
1095 gfs2_unmount_lockproto(&sdp->sd_lockstruct); 1145 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
1146 lm->lm_unmount)
1147 lm->lm_unmount(sdp);
1096} 1148}
1097 1149
1098/** 1150/**
@@ -1116,12 +1168,20 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1116 return -ENOMEM; 1168 return -ENOMEM;
1117 } 1169 }
1118 1170
1119 error = gfs2_mount_args(sdp, (char *)data, 0); 1171 sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
1172 sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
1173
1174 error = gfs2_mount_args(sdp, &sdp->sd_args, data);
1120 if (error) { 1175 if (error) {
1121 printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); 1176 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
1122 goto fail; 1177 goto fail;
1123 } 1178 }
1124 1179
1180 if (sdp->sd_args.ar_spectator)
1181 sb->s_flags |= MS_RDONLY;
1182 if (sdp->sd_args.ar_posix_acl)
1183 sb->s_flags |= MS_POSIXACL;
1184
1125 sb->s_magic = GFS2_MAGIC; 1185 sb->s_magic = GFS2_MAGIC;
1126 sb->s_op = &gfs2_super_ops; 1186 sb->s_op = &gfs2_super_ops;
1127 sb->s_export_op = &gfs2_export_ops; 1187 sb->s_export_op = &gfs2_export_ops;
@@ -1199,6 +1259,8 @@ fail_sb:
1199 dput(sdp->sd_root_dir); 1259 dput(sdp->sd_root_dir);
1200 if (sdp->sd_master_dir) 1260 if (sdp->sd_master_dir)
1201 dput(sdp->sd_master_dir); 1261 dput(sdp->sd_master_dir);
1262 if (sb->s_root)
1263 dput(sb->s_root);
1202 sb->s_root = NULL; 1264 sb->s_root = NULL;
1203fail_locking: 1265fail_locking:
1204 init_locking(sdp, &mount_gh, UNDO); 1266 init_locking(sdp, &mount_gh, UNDO);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 49877546beb9..abd5429ae285 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -18,7 +18,6 @@
18#include <linux/posix_acl.h> 18#include <linux/posix_acl.h>
19#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
20#include <linux/crc32.h> 20#include <linux/crc32.h>
21#include <linux/lm_interface.h>
22#include <linux/fiemap.h> 21#include <linux/fiemap.h>
23#include <asm/uaccess.h> 22#include <asm/uaccess.h>
24 23
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 320323d03479..458019569dcb 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -19,7 +19,6 @@
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h> 21#include <linux/crc32.h>
22#include <linux/lm_interface.h>
23#include <linux/time.h> 22#include <linux/time.h>
24 23
25#include "gfs2.h" 24#include "gfs2.h"
@@ -27,7 +26,6 @@
27#include "glock.h" 26#include "glock.h"
28#include "inode.h" 27#include "inode.h"
29#include "log.h" 28#include "log.h"
30#include "mount.h"
31#include "quota.h" 29#include "quota.h"
32#include "recovery.h" 30#include "recovery.h"
33#include "rgrp.h" 31#include "rgrp.h"
@@ -40,6 +38,8 @@
40#include "bmap.h" 38#include "bmap.h"
41#include "meta_io.h" 39#include "meta_io.h"
42 40
41#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
42
43/** 43/**
44 * gfs2_write_inode - Make sure the inode is stable on the disk 44 * gfs2_write_inode - Make sure the inode is stable on the disk
45 * @inode: The inode 45 * @inode: The inode
@@ -435,25 +435,45 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
435static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) 435static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
436{ 436{
437 struct gfs2_sbd *sdp = sb->s_fs_info; 437 struct gfs2_sbd *sdp = sb->s_fs_info;
438 struct gfs2_args args = sdp->sd_args; /* Default to current settings */
438 int error; 439 int error;
439 440
440 error = gfs2_mount_args(sdp, data, 1); 441 error = gfs2_mount_args(sdp, &args, data);
441 if (error) 442 if (error)
442 return error; 443 return error;
443 444
445 /* Not allowed to change locking details */
446 if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
447 strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
448 strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
449 return -EINVAL;
450
451 /* Some flags must not be changed */
452 if (args_neq(&args, &sdp->sd_args, spectator) ||
453 args_neq(&args, &sdp->sd_args, ignore_local_fs) ||
454 args_neq(&args, &sdp->sd_args, localflocks) ||
455 args_neq(&args, &sdp->sd_args, localcaching) ||
456 args_neq(&args, &sdp->sd_args, meta))
457 return -EINVAL;
458
444 if (sdp->sd_args.ar_spectator) 459 if (sdp->sd_args.ar_spectator)
445 *flags |= MS_RDONLY; 460 *flags |= MS_RDONLY;
446 else { 461
447 if (*flags & MS_RDONLY) { 462 if ((sb->s_flags ^ *flags) & MS_RDONLY) {
448 if (!(sb->s_flags & MS_RDONLY)) 463 if (*flags & MS_RDONLY)
449 error = gfs2_make_fs_ro(sdp); 464 error = gfs2_make_fs_ro(sdp);
450 } else if (!(*flags & MS_RDONLY) && 465 else
451 (sb->s_flags & MS_RDONLY)) {
452 error = gfs2_make_fs_rw(sdp); 466 error = gfs2_make_fs_rw(sdp);
453 } 467 if (error)
468 return error;
454 } 469 }
455 470
456 return error; 471 sdp->sd_args = args;
472 if (sdp->sd_args.ar_posix_acl)
473 sb->s_flags |= MS_POSIXACL;
474 else
475 sb->s_flags &= ~MS_POSIXACL;
476 return 0;
457} 477}
458 478
459/** 479/**
@@ -588,6 +608,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
588 } 608 }
589 seq_printf(s, ",data=%s", state); 609 seq_printf(s, ",data=%s", state);
590 } 610 }
611 if (args->ar_discard)
612 seq_printf(s, ",discard");
591 613
592 return 0; 614 return 0;
593} 615}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b08d09696b3e..8d53f66b5bcc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -45,7 +45,6 @@
45#include <linux/fs.h> 45#include <linux/fs.h>
46#include <linux/bio.h> 46#include <linux/bio.h>
47#include <linux/gfs2_ondisk.h> 47#include <linux/gfs2_ondisk.h>
48#include <linux/lm_interface.h>
49#include <linux/kthread.h> 48#include <linux/kthread.h>
50#include <linux/freezer.h> 49#include <linux/freezer.h>
51 50
@@ -80,6 +79,51 @@ struct gfs2_quota_change_host {
80 u32 qc_id; 79 u32 qc_id;
81}; 80};
82 81
82static LIST_HEAD(qd_lru_list);
83static atomic_t qd_lru_count = ATOMIC_INIT(0);
84static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED;
85
86int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
87{
88 struct gfs2_quota_data *qd;
89 struct gfs2_sbd *sdp;
90
91 if (nr == 0)
92 goto out;
93
94 if (!(gfp_mask & __GFP_FS))
95 return -1;
96
97 spin_lock(&qd_lru_lock);
98 while (nr && !list_empty(&qd_lru_list)) {
99 qd = list_entry(qd_lru_list.next,
100 struct gfs2_quota_data, qd_reclaim);
101 sdp = qd->qd_gl->gl_sbd;
102
103 /* Free from the filesystem-specific list */
104 list_del(&qd->qd_list);
105
106 gfs2_assert_warn(sdp, !qd->qd_change);
107 gfs2_assert_warn(sdp, !qd->qd_slot_count);
108 gfs2_assert_warn(sdp, !qd->qd_bh_count);
109
110 gfs2_glock_put(qd->qd_gl);
111 atomic_dec(&sdp->sd_quota_count);
112
113 /* Delete it from the common reclaim list */
114 list_del_init(&qd->qd_reclaim);
115 atomic_dec(&qd_lru_count);
116 spin_unlock(&qd_lru_lock);
117 kmem_cache_free(gfs2_quotad_cachep, qd);
118 spin_lock(&qd_lru_lock);
119 nr--;
120 }
121 spin_unlock(&qd_lru_lock);
122
123out:
124 return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
125}
126
83static u64 qd2offset(struct gfs2_quota_data *qd) 127static u64 qd2offset(struct gfs2_quota_data *qd)
84{ 128{
85 u64 offset; 129 u64 offset;
@@ -100,22 +144,18 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
100 if (!qd) 144 if (!qd)
101 return -ENOMEM; 145 return -ENOMEM;
102 146
103 qd->qd_count = 1; 147 atomic_set(&qd->qd_count, 1);
104 qd->qd_id = id; 148 qd->qd_id = id;
105 if (user) 149 if (user)
106 set_bit(QDF_USER, &qd->qd_flags); 150 set_bit(QDF_USER, &qd->qd_flags);
107 qd->qd_slot = -1; 151 qd->qd_slot = -1;
152 INIT_LIST_HEAD(&qd->qd_reclaim);
108 153
109 error = gfs2_glock_get(sdp, 2 * (u64)id + !user, 154 error = gfs2_glock_get(sdp, 2 * (u64)id + !user,
110 &gfs2_quota_glops, CREATE, &qd->qd_gl); 155 &gfs2_quota_glops, CREATE, &qd->qd_gl);
111 if (error) 156 if (error)
112 goto fail; 157 goto fail;
113 158
114 error = gfs2_lvb_hold(qd->qd_gl);
115 gfs2_glock_put(qd->qd_gl);
116 if (error)
117 goto fail;
118
119 *qdp = qd; 159 *qdp = qd;
120 160
121 return 0; 161 return 0;
@@ -135,11 +175,17 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
135 175
136 for (;;) { 176 for (;;) {
137 found = 0; 177 found = 0;
138 spin_lock(&sdp->sd_quota_spin); 178 spin_lock(&qd_lru_lock);
139 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { 179 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
140 if (qd->qd_id == id && 180 if (qd->qd_id == id &&
141 !test_bit(QDF_USER, &qd->qd_flags) == !user) { 181 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
142 qd->qd_count++; 182 if (!atomic_read(&qd->qd_count) &&
183 !list_empty(&qd->qd_reclaim)) {
184 /* Remove it from reclaim list */
185 list_del_init(&qd->qd_reclaim);
186 atomic_dec(&qd_lru_count);
187 }
188 atomic_inc(&qd->qd_count);
143 found = 1; 189 found = 1;
144 break; 190 break;
145 } 191 }
@@ -155,11 +201,11 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
155 new_qd = NULL; 201 new_qd = NULL;
156 } 202 }
157 203
158 spin_unlock(&sdp->sd_quota_spin); 204 spin_unlock(&qd_lru_lock);
159 205
160 if (qd || !create) { 206 if (qd || !create) {
161 if (new_qd) { 207 if (new_qd) {
162 gfs2_lvb_unhold(new_qd->qd_gl); 208 gfs2_glock_put(new_qd->qd_gl);
163 kmem_cache_free(gfs2_quotad_cachep, new_qd); 209 kmem_cache_free(gfs2_quotad_cachep, new_qd);
164 } 210 }
165 *qdp = qd; 211 *qdp = qd;
@@ -175,21 +221,18 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
175static void qd_hold(struct gfs2_quota_data *qd) 221static void qd_hold(struct gfs2_quota_data *qd)
176{ 222{
177 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 223 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
178 224 gfs2_assert(sdp, atomic_read(&qd->qd_count));
179 spin_lock(&sdp->sd_quota_spin); 225 atomic_inc(&qd->qd_count);
180 gfs2_assert(sdp, qd->qd_count);
181 qd->qd_count++;
182 spin_unlock(&sdp->sd_quota_spin);
183} 226}
184 227
185static void qd_put(struct gfs2_quota_data *qd) 228static void qd_put(struct gfs2_quota_data *qd)
186{ 229{
187 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 230 if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) {
188 spin_lock(&sdp->sd_quota_spin); 231 /* Add to the reclaim list */
189 gfs2_assert(sdp, qd->qd_count); 232 list_add_tail(&qd->qd_reclaim, &qd_lru_list);
190 if (!--qd->qd_count) 233 atomic_inc(&qd_lru_count);
191 qd->qd_last_touched = jiffies; 234 spin_unlock(&qd_lru_lock);
192 spin_unlock(&sdp->sd_quota_spin); 235 }
193} 236}
194 237
195static int slot_get(struct gfs2_quota_data *qd) 238static int slot_get(struct gfs2_quota_data *qd)
@@ -198,10 +241,10 @@ static int slot_get(struct gfs2_quota_data *qd)
198 unsigned int c, o = 0, b; 241 unsigned int c, o = 0, b;
199 unsigned char byte = 0; 242 unsigned char byte = 0;
200 243
201 spin_lock(&sdp->sd_quota_spin); 244 spin_lock(&qd_lru_lock);
202 245
203 if (qd->qd_slot_count++) { 246 if (qd->qd_slot_count++) {
204 spin_unlock(&sdp->sd_quota_spin); 247 spin_unlock(&qd_lru_lock);
205 return 0; 248 return 0;
206 } 249 }
207 250
@@ -225,13 +268,13 @@ found:
225 268
226 sdp->sd_quota_bitmap[c][o] |= 1 << b; 269 sdp->sd_quota_bitmap[c][o] |= 1 << b;
227 270
228 spin_unlock(&sdp->sd_quota_spin); 271 spin_unlock(&qd_lru_lock);
229 272
230 return 0; 273 return 0;
231 274
232fail: 275fail:
233 qd->qd_slot_count--; 276 qd->qd_slot_count--;
234 spin_unlock(&sdp->sd_quota_spin); 277 spin_unlock(&qd_lru_lock);
235 return -ENOSPC; 278 return -ENOSPC;
236} 279}
237 280
@@ -239,23 +282,23 @@ static void slot_hold(struct gfs2_quota_data *qd)
239{ 282{
240 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 283 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
241 284
242 spin_lock(&sdp->sd_quota_spin); 285 spin_lock(&qd_lru_lock);
243 gfs2_assert(sdp, qd->qd_slot_count); 286 gfs2_assert(sdp, qd->qd_slot_count);
244 qd->qd_slot_count++; 287 qd->qd_slot_count++;
245 spin_unlock(&sdp->sd_quota_spin); 288 spin_unlock(&qd_lru_lock);
246} 289}
247 290
248static void slot_put(struct gfs2_quota_data *qd) 291static void slot_put(struct gfs2_quota_data *qd)
249{ 292{
250 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 293 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
251 294
252 spin_lock(&sdp->sd_quota_spin); 295 spin_lock(&qd_lru_lock);
253 gfs2_assert(sdp, qd->qd_slot_count); 296 gfs2_assert(sdp, qd->qd_slot_count);
254 if (!--qd->qd_slot_count) { 297 if (!--qd->qd_slot_count) {
255 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); 298 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
256 qd->qd_slot = -1; 299 qd->qd_slot = -1;
257 } 300 }
258 spin_unlock(&sdp->sd_quota_spin); 301 spin_unlock(&qd_lru_lock);
259} 302}
260 303
261static int bh_get(struct gfs2_quota_data *qd) 304static int bh_get(struct gfs2_quota_data *qd)
@@ -330,7 +373,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
330 if (sdp->sd_vfs->s_flags & MS_RDONLY) 373 if (sdp->sd_vfs->s_flags & MS_RDONLY)
331 return 0; 374 return 0;
332 375
333 spin_lock(&sdp->sd_quota_spin); 376 spin_lock(&qd_lru_lock);
334 377
335 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { 378 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
336 if (test_bit(QDF_LOCKED, &qd->qd_flags) || 379 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
@@ -341,8 +384,8 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
341 list_move_tail(&qd->qd_list, &sdp->sd_quota_list); 384 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
342 385
343 set_bit(QDF_LOCKED, &qd->qd_flags); 386 set_bit(QDF_LOCKED, &qd->qd_flags);
344 gfs2_assert_warn(sdp, qd->qd_count); 387 gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
345 qd->qd_count++; 388 atomic_inc(&qd->qd_count);
346 qd->qd_change_sync = qd->qd_change; 389 qd->qd_change_sync = qd->qd_change;
347 gfs2_assert_warn(sdp, qd->qd_slot_count); 390 gfs2_assert_warn(sdp, qd->qd_slot_count);
348 qd->qd_slot_count++; 391 qd->qd_slot_count++;
@@ -354,7 +397,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
354 if (!found) 397 if (!found)
355 qd = NULL; 398 qd = NULL;
356 399
357 spin_unlock(&sdp->sd_quota_spin); 400 spin_unlock(&qd_lru_lock);
358 401
359 if (qd) { 402 if (qd) {
360 gfs2_assert_warn(sdp, qd->qd_change_sync); 403 gfs2_assert_warn(sdp, qd->qd_change_sync);
@@ -379,24 +422,24 @@ static int qd_trylock(struct gfs2_quota_data *qd)
379 if (sdp->sd_vfs->s_flags & MS_RDONLY) 422 if (sdp->sd_vfs->s_flags & MS_RDONLY)
380 return 0; 423 return 0;
381 424
382 spin_lock(&sdp->sd_quota_spin); 425 spin_lock(&qd_lru_lock);
383 426
384 if (test_bit(QDF_LOCKED, &qd->qd_flags) || 427 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
385 !test_bit(QDF_CHANGE, &qd->qd_flags)) { 428 !test_bit(QDF_CHANGE, &qd->qd_flags)) {
386 spin_unlock(&sdp->sd_quota_spin); 429 spin_unlock(&qd_lru_lock);
387 return 0; 430 return 0;
388 } 431 }
389 432
390 list_move_tail(&qd->qd_list, &sdp->sd_quota_list); 433 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
391 434
392 set_bit(QDF_LOCKED, &qd->qd_flags); 435 set_bit(QDF_LOCKED, &qd->qd_flags);
393 gfs2_assert_warn(sdp, qd->qd_count); 436 gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
394 qd->qd_count++; 437 atomic_inc(&qd->qd_count);
395 qd->qd_change_sync = qd->qd_change; 438 qd->qd_change_sync = qd->qd_change;
396 gfs2_assert_warn(sdp, qd->qd_slot_count); 439 gfs2_assert_warn(sdp, qd->qd_slot_count);
397 qd->qd_slot_count++; 440 qd->qd_slot_count++;
398 441
399 spin_unlock(&sdp->sd_quota_spin); 442 spin_unlock(&qd_lru_lock);
400 443
401 gfs2_assert_warn(sdp, qd->qd_change_sync); 444 gfs2_assert_warn(sdp, qd->qd_change_sync);
402 if (bh_get(qd)) { 445 if (bh_get(qd)) {
@@ -556,9 +599,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
556 x = be64_to_cpu(qc->qc_change) + change; 599 x = be64_to_cpu(qc->qc_change) + change;
557 qc->qc_change = cpu_to_be64(x); 600 qc->qc_change = cpu_to_be64(x);
558 601
559 spin_lock(&sdp->sd_quota_spin); 602 spin_lock(&qd_lru_lock);
560 qd->qd_change = x; 603 qd->qd_change = x;
561 spin_unlock(&sdp->sd_quota_spin); 604 spin_unlock(&qd_lru_lock);
562 605
563 if (!x) { 606 if (!x) {
564 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); 607 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
@@ -802,8 +845,8 @@ restart:
802 loff_t pos; 845 loff_t pos;
803 gfs2_glock_dq_uninit(q_gh); 846 gfs2_glock_dq_uninit(q_gh);
804 error = gfs2_glock_nq_init(qd->qd_gl, 847 error = gfs2_glock_nq_init(qd->qd_gl,
805 LM_ST_EXCLUSIVE, GL_NOCACHE, 848 LM_ST_EXCLUSIVE, GL_NOCACHE,
806 q_gh); 849 q_gh);
807 if (error) 850 if (error)
808 return error; 851 return error;
809 852
@@ -820,7 +863,6 @@ restart:
820 863
821 gfs2_glock_dq_uninit(&i_gh); 864 gfs2_glock_dq_uninit(&i_gh);
822 865
823
824 gfs2_quota_in(&q, buf); 866 gfs2_quota_in(&q, buf);
825 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 867 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
826 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); 868 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
@@ -890,9 +932,9 @@ static int need_sync(struct gfs2_quota_data *qd)
890 if (!qd->qd_qb.qb_limit) 932 if (!qd->qd_qb.qb_limit)
891 return 0; 933 return 0;
892 934
893 spin_lock(&sdp->sd_quota_spin); 935 spin_lock(&qd_lru_lock);
894 value = qd->qd_change; 936 value = qd->qd_change;
895 spin_unlock(&sdp->sd_quota_spin); 937 spin_unlock(&qd_lru_lock);
896 938
897 spin_lock(&gt->gt_spin); 939 spin_lock(&gt->gt_spin);
898 num = gt->gt_quota_scale_num; 940 num = gt->gt_quota_scale_num;
@@ -985,9 +1027,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
985 continue; 1027 continue;
986 1028
987 value = (s64)be64_to_cpu(qd->qd_qb.qb_value); 1029 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
988 spin_lock(&sdp->sd_quota_spin); 1030 spin_lock(&qd_lru_lock);
989 value += qd->qd_change; 1031 value += qd->qd_change;
990 spin_unlock(&sdp->sd_quota_spin); 1032 spin_unlock(&qd_lru_lock);
991 1033
992 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1034 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
993 print_message(qd, "exceeded"); 1035 print_message(qd, "exceeded");
@@ -1171,13 +1213,12 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
1171 qd->qd_change = qc.qc_change; 1213 qd->qd_change = qc.qc_change;
1172 qd->qd_slot = slot; 1214 qd->qd_slot = slot;
1173 qd->qd_slot_count = 1; 1215 qd->qd_slot_count = 1;
1174 qd->qd_last_touched = jiffies;
1175 1216
1176 spin_lock(&sdp->sd_quota_spin); 1217 spin_lock(&qd_lru_lock);
1177 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); 1218 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
1178 list_add(&qd->qd_list, &sdp->sd_quota_list); 1219 list_add(&qd->qd_list, &sdp->sd_quota_list);
1179 atomic_inc(&sdp->sd_quota_count); 1220 atomic_inc(&sdp->sd_quota_count);
1180 spin_unlock(&sdp->sd_quota_spin); 1221 spin_unlock(&qd_lru_lock);
1181 1222
1182 found++; 1223 found++;
1183 } 1224 }
@@ -1197,73 +1238,48 @@ fail:
1197 return error; 1238 return error;
1198} 1239}
1199 1240
1200static void gfs2_quota_scan(struct gfs2_sbd *sdp)
1201{
1202 struct gfs2_quota_data *qd, *safe;
1203 LIST_HEAD(dead);
1204
1205 spin_lock(&sdp->sd_quota_spin);
1206 list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
1207 if (!qd->qd_count &&
1208 time_after_eq(jiffies, qd->qd_last_touched +
1209 gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
1210 list_move(&qd->qd_list, &dead);
1211 gfs2_assert_warn(sdp,
1212 atomic_read(&sdp->sd_quota_count) > 0);
1213 atomic_dec(&sdp->sd_quota_count);
1214 }
1215 }
1216 spin_unlock(&sdp->sd_quota_spin);
1217
1218 while (!list_empty(&dead)) {
1219 qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
1220 list_del(&qd->qd_list);
1221
1222 gfs2_assert_warn(sdp, !qd->qd_change);
1223 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1224 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1225
1226 gfs2_lvb_unhold(qd->qd_gl);
1227 kmem_cache_free(gfs2_quotad_cachep, qd);
1228 }
1229}
1230
1231void gfs2_quota_cleanup(struct gfs2_sbd *sdp) 1241void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
1232{ 1242{
1233 struct list_head *head = &sdp->sd_quota_list; 1243 struct list_head *head = &sdp->sd_quota_list;
1234 struct gfs2_quota_data *qd; 1244 struct gfs2_quota_data *qd;
1235 unsigned int x; 1245 unsigned int x;
1236 1246
1237 spin_lock(&sdp->sd_quota_spin); 1247 spin_lock(&qd_lru_lock);
1238 while (!list_empty(head)) { 1248 while (!list_empty(head)) {
1239 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); 1249 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
1240 1250
1241 if (qd->qd_count > 1 || 1251 if (atomic_read(&qd->qd_count) > 1 ||
1242 (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) { 1252 (atomic_read(&qd->qd_count) &&
1253 !test_bit(QDF_CHANGE, &qd->qd_flags))) {
1243 list_move(&qd->qd_list, head); 1254 list_move(&qd->qd_list, head);
1244 spin_unlock(&sdp->sd_quota_spin); 1255 spin_unlock(&qd_lru_lock);
1245 schedule(); 1256 schedule();
1246 spin_lock(&sdp->sd_quota_spin); 1257 spin_lock(&qd_lru_lock);
1247 continue; 1258 continue;
1248 } 1259 }
1249 1260
1250 list_del(&qd->qd_list); 1261 list_del(&qd->qd_list);
1262 /* Also remove if this qd exists in the reclaim list */
1263 if (!list_empty(&qd->qd_reclaim)) {
1264 list_del_init(&qd->qd_reclaim);
1265 atomic_dec(&qd_lru_count);
1266 }
1251 atomic_dec(&sdp->sd_quota_count); 1267 atomic_dec(&sdp->sd_quota_count);
1252 spin_unlock(&sdp->sd_quota_spin); 1268 spin_unlock(&qd_lru_lock);
1253 1269
1254 if (!qd->qd_count) { 1270 if (!atomic_read(&qd->qd_count)) {
1255 gfs2_assert_warn(sdp, !qd->qd_change); 1271 gfs2_assert_warn(sdp, !qd->qd_change);
1256 gfs2_assert_warn(sdp, !qd->qd_slot_count); 1272 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1257 } else 1273 } else
1258 gfs2_assert_warn(sdp, qd->qd_slot_count == 1); 1274 gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
1259 gfs2_assert_warn(sdp, !qd->qd_bh_count); 1275 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1260 1276
1261 gfs2_lvb_unhold(qd->qd_gl); 1277 gfs2_glock_put(qd->qd_gl);
1262 kmem_cache_free(gfs2_quotad_cachep, qd); 1278 kmem_cache_free(gfs2_quotad_cachep, qd);
1263 1279
1264 spin_lock(&sdp->sd_quota_spin); 1280 spin_lock(&qd_lru_lock);
1265 } 1281 }
1266 spin_unlock(&sdp->sd_quota_spin); 1282 spin_unlock(&qd_lru_lock);
1267 1283
1268 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); 1284 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
1269 1285
@@ -1341,9 +1357,6 @@ int gfs2_quotad(void *data)
1341 quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, 1357 quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
1342 &quotad_timeo, &tune->gt_quota_quantum); 1358 &quotad_timeo, &tune->gt_quota_quantum);
1343 1359
1344 /* FIXME: This should be turned into a shrinker */
1345 gfs2_quota_scan(sdp);
1346
1347 /* Check for & recover partially truncated inodes */ 1360 /* Check for & recover partially truncated inodes */
1348 quotad_check_trunc_list(sdp); 1361 quotad_check_trunc_list(sdp);
1349 1362
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index cec9032be97d..0fa5fa63d0e8 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -49,4 +49,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
49 return ret; 49 return ret;
50} 50}
51 51
52extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
53
52#endif /* __QUOTA_DOT_H__ */ 54#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index efd09c3d2b26..247e8f7d6b3d 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/crc32.h> 15#include <linux/crc32.h>
16#include <linux/lm_interface.h>
17#include <linux/kthread.h> 16#include <linux/kthread.h>
18#include <linux/freezer.h> 17#include <linux/freezer.h>
19 18
@@ -427,20 +426,23 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
427} 426}
428 427
429 428
430static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, 429static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
431 unsigned int message) 430 unsigned int message)
432{ 431{
433 if (!sdp->sd_lockstruct.ls_ops->lm_recovery_done) 432 char env_jid[20];
434 return; 433 char env_status[20];
435 434 char *envp[] = { env_jid, env_status, NULL };
436 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 435 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
437 sdp->sd_lockstruct.ls_ops->lm_recovery_done( 436 ls->ls_recover_jid_done = jid;
438 sdp->sd_lockstruct.ls_lockspace, jid, message); 437 ls->ls_recover_jid_status = message;
438 sprintf(env_jid, "JID=%d", jid);
439 sprintf(env_status, "RECOVERY=%s",
440 message == LM_RD_SUCCESS ? "Done" : "Failed");
441 kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
439} 442}
440 443
441
442/** 444/**
443 * gfs2_recover_journal - recovery a given journal 445 * gfs2_recover_journal - recover a given journal
444 * @jd: the struct gfs2_jdesc describing the journal 446 * @jd: the struct gfs2_jdesc describing the journal
445 * 447 *
446 * Acquire the journal's lock, check to see if the journal is clean, and 448 * Acquire the journal's lock, check to see if the journal is clean, and
@@ -561,7 +563,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
561 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) 563 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
562 gfs2_glock_dq_uninit(&ji_gh); 564 gfs2_glock_dq_uninit(&ji_gh);
563 565
564 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); 566 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
565 567
566 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) 568 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
567 gfs2_glock_dq_uninit(&j_gh); 569 gfs2_glock_dq_uninit(&j_gh);
@@ -581,7 +583,7 @@ fail_gunlock_j:
581 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); 583 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
582 584
583fail: 585fail:
584 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); 586 gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
585 return error; 587 return error;
586} 588}
587 589
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8b01c635d925..f03d024038ea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -13,8 +13,8 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17#include <linux/prefetch.h> 16#include <linux/prefetch.h>
17#include <linux/blkdev.h>
18 18
19#include "gfs2.h" 19#include "gfs2.h"
20#include "incore.h" 20#include "incore.h"
@@ -132,81 +132,90 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
132} 132}
133 133
134/** 134/**
135 * gfs2_bit_search
136 * @ptr: Pointer to bitmap data
137 * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
138 * @state: The state we are searching for
139 *
140 * We xor the bitmap data with a patter which is the bitwise opposite
141 * of what we are looking for, this gives rise to a pattern of ones
142 * wherever there is a match. Since we have two bits per entry, we
143 * take this pattern, shift it down by one place and then and it with
144 * the original. All the even bit positions (0,2,4, etc) then represent
145 * successful matches, so we mask with 0x55555..... to remove the unwanted
146 * odd bit positions.
147 *
148 * This allows searching of a whole u64 at once (32 blocks) with a
149 * single test (on 64 bit arches).
150 */
151
152static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
153{
154 u64 tmp;
155 static const u64 search[] = {
156 [0] = 0xffffffffffffffffULL,
157 [1] = 0xaaaaaaaaaaaaaaaaULL,
158 [2] = 0x5555555555555555ULL,
159 [3] = 0x0000000000000000ULL,
160 };
161 tmp = le64_to_cpu(*ptr) ^ search[state];
162 tmp &= (tmp >> 1);
163 tmp &= mask;
164 return tmp;
165}
166
167/**
135 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 168 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
136 * a block in a given allocation state. 169 * a block in a given allocation state.
137 * @buffer: the buffer that holds the bitmaps 170 * @buffer: the buffer that holds the bitmaps
138 * @buflen: the length (in bytes) of the buffer 171 * @len: the length (in bytes) of the buffer
139 * @goal: start search at this block's bit-pair (within @buffer) 172 * @goal: start search at this block's bit-pair (within @buffer)
140 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for. 173 * @state: GFS2_BLKST_XXX the state of the block we're looking for.
141 * 174 *
142 * Scope of @goal and returned block number is only within this bitmap buffer, 175 * Scope of @goal and returned block number is only within this bitmap buffer,
143 * not entire rgrp or filesystem. @buffer will be offset from the actual 176 * not entire rgrp or filesystem. @buffer will be offset from the actual
144 * beginning of a bitmap block buffer, skipping any header structures. 177 * beginning of a bitmap block buffer, skipping any header structures, but
178 * headers are always a multiple of 64 bits long so that the buffer is
179 * always aligned to a 64 bit boundary.
180 *
181 * The size of the buffer is in bytes, but is it assumed that it is
182 * always ok to to read a complete multiple of 64 bits at the end
183 * of the block in case the end is no aligned to a natural boundary.
145 * 184 *
146 * Return: the block number (bitmap buffer scope) that was found 185 * Return: the block number (bitmap buffer scope) that was found
147 */ 186 */
148 187
149static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, 188static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
150 u8 old_state) 189 u32 goal, u8 state)
151{ 190{
152 const u8 *byte, *start, *end; 191 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
153 int bit, startbit; 192 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
154 u32 g1, g2, misaligned; 193 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
155 unsigned long *plong; 194 u64 tmp;
156 unsigned long lskipval; 195 u64 mask = 0x5555555555555555ULL;
157 196 u32 bit;
158 lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55; 197
159 g1 = (goal / GFS2_NBBY); 198 BUG_ON(state > 3);
160 start = buffer + g1; 199
161 byte = start; 200 /* Mask off bits we don't care about at the start of the search */
162 end = buffer + buflen; 201 mask <<= spoint;
163 g2 = ALIGN(g1, sizeof(unsigned long)); 202 tmp = gfs2_bit_search(ptr, mask, state);
164 plong = (unsigned long *)(buffer + g2); 203 ptr++;
165 startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; 204 while(tmp == 0 && ptr < end) {
166 misaligned = g2 - g1; 205 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
167 if (!misaligned) 206 ptr++;
168 goto ulong_aligned;
169/* parse the bitmap a byte at a time */
170misaligned:
171 while (byte < end) {
172 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
173 return goal +
174 (((byte - start) * GFS2_NBBY) +
175 ((bit - startbit) >> 1));
176 }
177 bit += GFS2_BIT_SIZE;
178 if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
179 bit = 0;
180 byte++;
181 misaligned--;
182 if (!misaligned) {
183 plong = (unsigned long *)byte;
184 goto ulong_aligned;
185 }
186 }
187 }
188 return BFITNOENT;
189
190/* parse the bitmap a unsigned long at a time */
191ulong_aligned:
192 /* Stop at "end - 1" or else prefetch can go past the end and segfault.
193 We could "if" it but we'd lose some of the performance gained.
194 This way will only slow down searching the very last 4/8 bytes
195 depending on architecture. I've experimented with several ways
196 of writing this section such as using an else before the goto
197 but this one seems to be the fastest. */
198 while ((unsigned char *)plong < end - sizeof(unsigned long)) {
199 prefetch(plong + 1);
200 if (((*plong) & LBITMASK) != lskipval)
201 break;
202 plong++;
203 }
204 if ((unsigned char *)plong < end) {
205 byte = (const u8 *)plong;
206 misaligned += sizeof(unsigned long) - 1;
207 goto misaligned;
208 } 207 }
209 return BFITNOENT; 208 /* Mask off any bits which are more than len bytes from the start */
209 if (ptr == end && (len & (sizeof(u64) - 1)))
210 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
211 /* Didn't find anything, so return */
212 if (tmp == 0)
213 return BFITNOENT;
214 ptr--;
215 bit = fls64(tmp);
216 bit--; /* fls64 always adds one to the bit count */
217 bit /= 2; /* two bits per entry in the bitmap */
218 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
210} 219}
211 220
212/** 221/**
@@ -831,6 +840,58 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
831 spin_unlock(&sdp->sd_rindex_spin); 840 spin_unlock(&sdp->sd_rindex_spin);
832} 841}
833 842
843static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
844 const struct gfs2_bitmap *bi)
845{
846 struct super_block *sb = sdp->sd_vfs;
847 struct block_device *bdev = sb->s_bdev;
848 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
849 bdev_hardsect_size(sb->s_bdev);
850 u64 blk;
851 sector_t start = 0;
852 sector_t nr_sects = 0;
853 int rv;
854 unsigned int x;
855
856 for (x = 0; x < bi->bi_len; x++) {
857 const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x;
858 const u8 *clone = bi->bi_clone + bi->bi_offset + x;
859 u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
860 diff &= 0x55;
861 if (diff == 0)
862 continue;
863 blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
864 blk *= sects_per_blk; /* convert to sectors */
865 while(diff) {
866 if (diff & 1) {
867 if (nr_sects == 0)
868 goto start_new_extent;
869 if ((start + nr_sects) != blk) {
870 rv = blkdev_issue_discard(bdev, start,
871 nr_sects, GFP_NOFS);
872 if (rv)
873 goto fail;
874 nr_sects = 0;
875start_new_extent:
876 start = blk;
877 }
878 nr_sects += sects_per_blk;
879 }
880 diff >>= 2;
881 blk += sects_per_blk;
882 }
883 }
884 if (nr_sects) {
885 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS);
886 if (rv)
887 goto fail;
888 }
889 return;
890fail:
891 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
892 sdp->sd_args.ar_discard = 0;
893}
894
834void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) 895void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
835{ 896{
836 struct gfs2_sbd *sdp = rgd->rd_sbd; 897 struct gfs2_sbd *sdp = rgd->rd_sbd;
@@ -841,6 +902,8 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
841 struct gfs2_bitmap *bi = rgd->rd_bits + x; 902 struct gfs2_bitmap *bi = rgd->rd_bits + x;
842 if (!bi->bi_clone) 903 if (!bi->bi_clone)
843 continue; 904 continue;
905 if (sdp->sd_args.ar_discard)
906 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi);
844 memcpy(bi->bi_clone + bi->bi_offset, 907 memcpy(bi->bi_clone + bi->bi_offset,
845 bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); 908 bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
846 } 909 }
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 141b781f2fcc..601913e0a482 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -15,7 +15,6 @@
15#include <linux/crc32.h> 15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/bio.h> 17#include <linux/bio.h>
18#include <linux/lm_interface.h>
19 18
20#include "gfs2.h" 19#include "gfs2.h"
21#include "incore.h" 20#include "incore.h"
@@ -339,7 +338,6 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
339 struct gfs2_holder *t_gh) 338 struct gfs2_holder *t_gh)
340{ 339{
341 struct gfs2_inode *ip; 340 struct gfs2_inode *ip;
342 struct gfs2_holder ji_gh;
343 struct gfs2_jdesc *jd; 341 struct gfs2_jdesc *jd;
344 struct lfcc *lfcc; 342 struct lfcc *lfcc;
345 LIST_HEAD(list); 343 LIST_HEAD(list);
@@ -387,7 +385,6 @@ out:
387 gfs2_glock_dq_uninit(&lfcc->gh); 385 gfs2_glock_dq_uninit(&lfcc->gh);
388 kfree(lfcc); 386 kfree(lfcc);
389 } 387 }
390 gfs2_glock_dq_uninit(&ji_gh);
391 return error; 388 return error;
392} 389}
393 390
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index f6b8b00ad881..91abdbedcc86 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -14,7 +14,7 @@
14#include <linux/dcache.h> 14#include <linux/dcache.h>
15#include "incore.h" 15#include "incore.h"
16 16
17void gfs2_lm_unmount(struct gfs2_sbd *sdp); 17extern void gfs2_lm_unmount(struct gfs2_sbd *sdp);
18 18
19static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) 19static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
20{ 20{
@@ -27,21 +27,23 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
27 27
28void gfs2_jindex_free(struct gfs2_sbd *sdp); 28void gfs2_jindex_free(struct gfs2_sbd *sdp);
29 29
30struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); 30extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data);
31int gfs2_jdesc_check(struct gfs2_jdesc *jd);
32 31
33int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, 32extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
34 struct gfs2_inode **ipp); 33extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
35 34
36int gfs2_make_fs_rw(struct gfs2_sbd *sdp); 35extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
36 struct gfs2_inode **ipp);
37 37
38int gfs2_statfs_init(struct gfs2_sbd *sdp); 38extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
39void gfs2_statfs_change(struct gfs2_sbd *sdp,
40 s64 total, s64 free, s64 dinodes);
41int gfs2_statfs_sync(struct gfs2_sbd *sdp);
42 39
43int gfs2_freeze_fs(struct gfs2_sbd *sdp); 40extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
44void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); 41extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
42 s64 dinodes);
43extern int gfs2_statfs_sync(struct gfs2_sbd *sdp);
44
45extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
46extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
45 47
46extern struct file_system_type gfs2_fs_type; 48extern struct file_system_type gfs2_fs_type;
47extern struct file_system_type gfs2meta_fs_type; 49extern struct file_system_type gfs2meta_fs_type;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 26c1fa777a95..7655f5025fec 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -14,9 +14,8 @@
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/kobject.h> 16#include <linux/kobject.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/lm_interface.h>
19#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <linux/gfs2_ondisk.h>
20 19
21#include "gfs2.h" 20#include "gfs2.h"
22#include "incore.h" 21#include "incore.h"
@@ -25,6 +24,7 @@
25#include "glock.h" 24#include "glock.h"
26#include "quota.h" 25#include "quota.h"
27#include "util.h" 26#include "util.h"
27#include "glops.h"
28 28
29static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) 29static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
30{ 30{
@@ -37,6 +37,30 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
37 return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname); 37 return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
38} 38}
39 39
40static int gfs2_uuid_valid(const u8 *uuid)
41{
42 int i;
43
44 for (i = 0; i < 16; i++) {
45 if (uuid[i])
46 return 1;
47 }
48 return 0;
49}
50
51static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
52{
53 const u8 *uuid = sdp->sd_sb.sb_uuid;
54 buf[0] = '\0';
55 if (!gfs2_uuid_valid(uuid))
56 return 0;
57 return snprintf(buf, PAGE_SIZE, "%02X%02X%02X%02X-%02X%02X-"
58 "%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
59 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5],
60 uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11],
61 uuid[12], uuid[13], uuid[14], uuid[15]);
62}
63
40static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) 64static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
41{ 65{
42 unsigned int count; 66 unsigned int count;
@@ -148,6 +172,46 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
148 return len; 172 return len;
149} 173}
150 174
175static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
176{
177 struct gfs2_glock *gl;
178 const struct gfs2_glock_operations *glops;
179 unsigned int glmode;
180 unsigned int gltype;
181 unsigned long long glnum;
182 char mode[16];
183 int rv;
184
185 if (!capable(CAP_SYS_ADMIN))
186 return -EACCES;
187
188 rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum,
189 mode);
190 if (rv != 3)
191 return -EINVAL;
192
193 if (strcmp(mode, "EX") == 0)
194 glmode = LM_ST_UNLOCKED;
195 else if ((strcmp(mode, "CW") == 0) || (strcmp(mode, "DF") == 0))
196 glmode = LM_ST_DEFERRED;
197 else if ((strcmp(mode, "PR") == 0) || (strcmp(mode, "SH") == 0))
198 glmode = LM_ST_SHARED;
199 else
200 return -EINVAL;
201
202 if (gltype > LM_TYPE_JOURNAL)
203 return -EINVAL;
204 glops = gfs2_glops_list[gltype];
205 if (glops == NULL)
206 return -EINVAL;
207 rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl);
208 if (rv)
209 return rv;
210 gfs2_glock_cb(gl, glmode);
211 gfs2_glock_put(gl);
212 return len;
213}
214
151struct gfs2_attr { 215struct gfs2_attr {
152 struct attribute attr; 216 struct attribute attr;
153 ssize_t (*show)(struct gfs2_sbd *, char *); 217 ssize_t (*show)(struct gfs2_sbd *, char *);
@@ -159,22 +223,26 @@ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
159 223
160GFS2_ATTR(id, 0444, id_show, NULL); 224GFS2_ATTR(id, 0444, id_show, NULL);
161GFS2_ATTR(fsname, 0444, fsname_show, NULL); 225GFS2_ATTR(fsname, 0444, fsname_show, NULL);
226GFS2_ATTR(uuid, 0444, uuid_show, NULL);
162GFS2_ATTR(freeze, 0644, freeze_show, freeze_store); 227GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
163GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 228GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
164GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store); 229GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
165GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store); 230GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
166GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store); 231GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
167GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store); 232GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
233GFS2_ATTR(demote_rq, 0200, NULL, demote_rq_store);
168 234
169static struct attribute *gfs2_attrs[] = { 235static struct attribute *gfs2_attrs[] = {
170 &gfs2_attr_id.attr, 236 &gfs2_attr_id.attr,
171 &gfs2_attr_fsname.attr, 237 &gfs2_attr_fsname.attr,
238 &gfs2_attr_uuid.attr,
172 &gfs2_attr_freeze.attr, 239 &gfs2_attr_freeze.attr,
173 &gfs2_attr_withdraw.attr, 240 &gfs2_attr_withdraw.attr,
174 &gfs2_attr_statfs_sync.attr, 241 &gfs2_attr_statfs_sync.attr,
175 &gfs2_attr_quota_sync.attr, 242 &gfs2_attr_quota_sync.attr,
176 &gfs2_attr_quota_refresh_user.attr, 243 &gfs2_attr_quota_refresh_user.attr,
177 &gfs2_attr_quota_refresh_group.attr, 244 &gfs2_attr_quota_refresh_group.attr,
245 &gfs2_attr_demote_rq.attr,
178 NULL, 246 NULL,
179}; 247};
180 248
@@ -224,14 +292,145 @@ static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
224 292
225LOCKSTRUCT_ATTR(jid, "%u\n"); 293LOCKSTRUCT_ATTR(jid, "%u\n");
226LOCKSTRUCT_ATTR(first, "%u\n"); 294LOCKSTRUCT_ATTR(first, "%u\n");
227LOCKSTRUCT_ATTR(lvb_size, "%u\n");
228LOCKSTRUCT_ATTR(flags, "%d\n");
229 295
230static struct attribute *lockstruct_attrs[] = { 296static struct attribute *lockstruct_attrs[] = {
231 &lockstruct_attr_jid.attr, 297 &lockstruct_attr_jid.attr,
232 &lockstruct_attr_first.attr, 298 &lockstruct_attr_first.attr,
233 &lockstruct_attr_lvb_size.attr, 299 NULL,
234 &lockstruct_attr_flags.attr, 300};
301
302/*
303 * lock_module. Originally from lock_dlm
304 */
305
306static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf)
307{
308 const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops;
309 return sprintf(buf, "%s\n", ops->lm_proto_name);
310}
311
312static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
313{
314 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
315 ssize_t ret;
316 int val = 0;
317
318 if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
319 val = 1;
320 ret = sprintf(buf, "%d\n", val);
321 return ret;
322}
323
324static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
325{
326 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
327 ssize_t ret = len;
328 int val;
329
330 val = simple_strtol(buf, NULL, 0);
331
332 if (val == 1)
333 set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
334 else if (val == 0) {
335 clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
336 smp_mb__after_clear_bit();
337 gfs2_glock_thaw(sdp);
338 } else {
339 ret = -EINVAL;
340 }
341 return ret;
342}
343
344static ssize_t lkid_show(struct gfs2_sbd *sdp, char *buf)
345{
346 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
347 return sprintf(buf, "%u\n", ls->ls_id);
348}
349
350static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
351{
352 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
353 return sprintf(buf, "%d\n", ls->ls_first);
354}
355
356static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
357{
358 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
359 return sprintf(buf, "%d\n", ls->ls_first_done);
360}
361
362static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf)
363{
364 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
365 return sprintf(buf, "%d\n", ls->ls_recover_jid);
366}
367
368static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
369{
370 struct gfs2_jdesc *jd;
371
372 spin_lock(&sdp->sd_jindex_spin);
373 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
374 if (jd->jd_jid != jid)
375 continue;
376 jd->jd_dirty = 1;
377 break;
378 }
379 spin_unlock(&sdp->sd_jindex_spin);
380}
381
382static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
383{
384 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
385 ls->ls_recover_jid = simple_strtol(buf, NULL, 0);
386 gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid);
387 if (sdp->sd_recoverd_process)
388 wake_up_process(sdp->sd_recoverd_process);
389 return len;
390}
391
392static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf)
393{
394 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
395 return sprintf(buf, "%d\n", ls->ls_recover_jid_done);
396}
397
398static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
399{
400 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
401 return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
402}
403
404struct gdlm_attr {
405 struct attribute attr;
406 ssize_t (*show)(struct gfs2_sbd *sdp, char *);
407 ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t);
408};
409
410#define GDLM_ATTR(_name,_mode,_show,_store) \
411static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
412
413GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
414GDLM_ATTR(block, 0644, block_show, block_store);
415GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
416GDLM_ATTR(id, 0444, lkid_show, NULL);
417GDLM_ATTR(first, 0444, lkfirst_show, NULL);
418GDLM_ATTR(first_done, 0444, first_done_show, NULL);
419GDLM_ATTR(recover, 0644, recover_show, recover_store);
420GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
421GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
422
423static struct attribute *lock_module_attrs[] = {
424 &gdlm_attr_proto_name.attr,
425 &gdlm_attr_block.attr,
426 &gdlm_attr_withdraw.attr,
427 &gdlm_attr_id.attr,
428 &lockstruct_attr_jid.attr,
429 &gdlm_attr_first.attr,
430 &gdlm_attr_first_done.attr,
431 &gdlm_attr_recover.attr,
432 &gdlm_attr_recover_done.attr,
433 &gdlm_attr_recover_status.attr,
235 NULL, 434 NULL,
236}; 435};
237 436
@@ -373,7 +572,6 @@ TUNE_ATTR(complain_secs, 0);
373TUNE_ATTR(statfs_slow, 0); 572TUNE_ATTR(statfs_slow, 0);
374TUNE_ATTR(new_files_jdata, 0); 573TUNE_ATTR(new_files_jdata, 0);
375TUNE_ATTR(quota_simul_sync, 1); 574TUNE_ATTR(quota_simul_sync, 1);
376TUNE_ATTR(quota_cache_secs, 1);
377TUNE_ATTR(stall_secs, 1); 575TUNE_ATTR(stall_secs, 1);
378TUNE_ATTR(statfs_quantum, 1); 576TUNE_ATTR(statfs_quantum, 1);
379TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); 577TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
@@ -389,7 +587,6 @@ static struct attribute *tune_attrs[] = {
389 &tune_attr_complain_secs.attr, 587 &tune_attr_complain_secs.attr,
390 &tune_attr_statfs_slow.attr, 588 &tune_attr_statfs_slow.attr,
391 &tune_attr_quota_simul_sync.attr, 589 &tune_attr_quota_simul_sync.attr,
392 &tune_attr_quota_cache_secs.attr,
393 &tune_attr_stall_secs.attr, 590 &tune_attr_stall_secs.attr,
394 &tune_attr_statfs_quantum.attr, 591 &tune_attr_statfs_quantum.attr,
395 &tune_attr_recoverd_secs.attr, 592 &tune_attr_recoverd_secs.attr,
@@ -414,6 +611,11 @@ static struct attribute_group tune_group = {
414 .attrs = tune_attrs, 611 .attrs = tune_attrs,
415}; 612};
416 613
614static struct attribute_group lock_module_group = {
615 .name = "lock_module",
616 .attrs = lock_module_attrs,
617};
618
417int gfs2_sys_fs_add(struct gfs2_sbd *sdp) 619int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
418{ 620{
419 int error; 621 int error;
@@ -436,9 +638,15 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
436 if (error) 638 if (error)
437 goto fail_args; 639 goto fail_args;
438 640
641 error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group);
642 if (error)
643 goto fail_tune;
644
439 kobject_uevent(&sdp->sd_kobj, KOBJ_ADD); 645 kobject_uevent(&sdp->sd_kobj, KOBJ_ADD);
440 return 0; 646 return 0;
441 647
648fail_tune:
649 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
442fail_args: 650fail_args:
443 sysfs_remove_group(&sdp->sd_kobj, &args_group); 651 sysfs_remove_group(&sdp->sd_kobj, &args_group);
444fail_lockstruct: 652fail_lockstruct:
@@ -455,15 +663,27 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
455 sysfs_remove_group(&sdp->sd_kobj, &tune_group); 663 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
456 sysfs_remove_group(&sdp->sd_kobj, &args_group); 664 sysfs_remove_group(&sdp->sd_kobj, &args_group);
457 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group); 665 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
666 sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
458 kobject_put(&sdp->sd_kobj); 667 kobject_put(&sdp->sd_kobj);
459} 668}
460 669
670
461static int gfs2_uevent(struct kset *kset, struct kobject *kobj, 671static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
462 struct kobj_uevent_env *env) 672 struct kobj_uevent_env *env)
463{ 673{
464 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); 674 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
675 const u8 *uuid = sdp->sd_sb.sb_uuid;
676
465 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 677 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
466 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 678 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
679 if (gfs2_uuid_valid(uuid)) {
680 add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-"
681 "%02X%02X-%02X%02X%02X%02X%02X%02X",
682 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4],
683 uuid[5], uuid[6], uuid[7], uuid[8], uuid[9],
684 uuid[10], uuid[11], uuid[12], uuid[13],
685 uuid[14], uuid[15]);
686 }
467 return 0; 687 return 0;
468} 688}
469 689
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index f677b8a83f0c..053752d4b27f 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -12,9 +12,8 @@
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/completion.h> 13#include <linux/completion.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
17#include <linux/lm_interface.h> 16#include <linux/gfs2_ondisk.h>
18 17
19#include "gfs2.h" 18#include "gfs2.h"
20#include "incore.h" 19#include "incore.h"
@@ -88,9 +87,11 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
88 87
89 if (!tr->tr_touched) { 88 if (!tr->tr_touched) {
90 gfs2_log_release(sdp, tr->tr_reserved); 89 gfs2_log_release(sdp, tr->tr_reserved);
91 gfs2_glock_dq(&tr->tr_t_gh); 90 if (tr->tr_t_gh.gh_gl) {
92 gfs2_holder_uninit(&tr->tr_t_gh); 91 gfs2_glock_dq(&tr->tr_t_gh);
93 kfree(tr); 92 gfs2_holder_uninit(&tr->tr_t_gh);
93 kfree(tr);
94 }
94 return; 95 return;
95 } 96 }
96 97
@@ -106,9 +107,11 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
106 } 107 }
107 108
108 gfs2_log_commit(sdp, tr); 109 gfs2_log_commit(sdp, tr);
109 gfs2_glock_dq(&tr->tr_t_gh); 110 if (tr->tr_t_gh.gh_gl) {
110 gfs2_holder_uninit(&tr->tr_t_gh); 111 gfs2_glock_dq(&tr->tr_t_gh);
111 kfree(tr); 112 gfs2_holder_uninit(&tr->tr_t_gh);
113 kfree(tr);
114 }
112 115
113 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) 116 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
114 gfs2_log_flush(sdp, NULL); 117 gfs2_log_flush(sdp, NULL);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 374f50e95496..9d12b1118ba0 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -13,7 +13,6 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/crc32.h> 14#include <linux/crc32.h>
15#include <linux/gfs2_ondisk.h> 15#include <linux/gfs2_ondisk.h>
16#include <linux/lm_interface.h>
17#include <asm/uaccess.h> 16#include <asm/uaccess.h>
18 17
19#include "gfs2.h" 18#include "gfs2.h"
@@ -35,6 +34,8 @@ void gfs2_assert_i(struct gfs2_sbd *sdp)
35 34
36int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) 35int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
37{ 36{
37 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
38 const struct lm_lockops *lm = ls->ls_ops;
38 va_list args; 39 va_list args;
39 40
40 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 41 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
@@ -47,8 +48,12 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
47 fs_err(sdp, "about to withdraw this file system\n"); 48 fs_err(sdp, "about to withdraw this file system\n");
48 BUG_ON(sdp->sd_args.ar_debug); 49 BUG_ON(sdp->sd_args.ar_debug);
49 50
50 fs_err(sdp, "telling LM to withdraw\n"); 51 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
51 gfs2_withdraw_lockproto(&sdp->sd_lockstruct); 52
53 if (lm->lm_unmount) {
54 fs_err(sdp, "telling LM to unmount\n");
55 lm->lm_unmount(sdp);
56 }
52 fs_err(sdp, "withdrawn\n"); 57 fs_err(sdp, "withdrawn\n");
53 dump_stack(); 58 dump_stack();
54 59
diff --git a/fs/inode.c b/fs/inode.c
index bb81bd515f85..29df4a297449 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -17,6 +17,7 @@
17#include <linux/hash.h> 17#include <linux/hash.h>
18#include <linux/swap.h> 18#include <linux/swap.h>
19#include <linux/security.h> 19#include <linux/security.h>
20#include <linux/ima.h>
20#include <linux/pagemap.h> 21#include <linux/pagemap.h>
21#include <linux/cdev.h> 22#include <linux/cdev.h>
22#include <linux/bootmem.h> 23#include <linux/bootmem.h>
@@ -147,13 +148,13 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
147 inode->i_cdev = NULL; 148 inode->i_cdev = NULL;
148 inode->i_rdev = 0; 149 inode->i_rdev = 0;
149 inode->dirtied_when = 0; 150 inode->dirtied_when = 0;
150 if (security_inode_alloc(inode)) { 151
151 if (inode->i_sb->s_op->destroy_inode) 152 if (security_inode_alloc(inode))
152 inode->i_sb->s_op->destroy_inode(inode); 153 goto out_free_inode;
153 else 154
154 kmem_cache_free(inode_cachep, (inode)); 155 /* allocate and initialize an i_integrity */
155 return NULL; 156 if (ima_inode_alloc(inode))
156 } 157 goto out_free_security;
157 158
158 spin_lock_init(&inode->i_lock); 159 spin_lock_init(&inode->i_lock);
159 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 160 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
@@ -189,6 +190,15 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
189 inode->i_mapping = mapping; 190 inode->i_mapping = mapping;
190 191
191 return inode; 192 return inode;
193
194out_free_security:
195 security_inode_free(inode);
196out_free_inode:
197 if (inode->i_sb->s_op->destroy_inode)
198 inode->i_sb->s_op->destroy_inode(inode);
199 else
200 kmem_cache_free(inode_cachep, (inode));
201 return NULL;
192} 202}
193EXPORT_SYMBOL(inode_init_always); 203EXPORT_SYMBOL(inode_init_always);
194 204
@@ -1290,6 +1300,40 @@ sector_t bmap(struct inode * inode, sector_t block)
1290} 1300}
1291EXPORT_SYMBOL(bmap); 1301EXPORT_SYMBOL(bmap);
1292 1302
1303/*
1304 * With relative atime, only update atime if the previous atime is
1305 * earlier than either the ctime or mtime or if at least a day has
1306 * passed since the last atime update.
1307 */
1308static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
1309 struct timespec now)
1310{
1311
1312 if (!(mnt->mnt_flags & MNT_RELATIME))
1313 return 1;
1314 /*
1315 * Is mtime younger than atime? If yes, update atime:
1316 */
1317 if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
1318 return 1;
1319 /*
1320 * Is ctime younger than atime? If yes, update atime:
1321 */
1322 if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
1323 return 1;
1324
1325 /*
1326 * Is the previous atime value older than a day? If yes,
1327 * update atime:
1328 */
1329 if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
1330 return 1;
1331 /*
1332 * Good, we can skip the atime update:
1333 */
1334 return 0;
1335}
1336
1293/** 1337/**
1294 * touch_atime - update the access time 1338 * touch_atime - update the access time
1295 * @mnt: mount the inode is accessed on 1339 * @mnt: mount the inode is accessed on
@@ -1317,17 +1361,12 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1317 goto out; 1361 goto out;
1318 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1362 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1319 goto out; 1363 goto out;
1320 if (mnt->mnt_flags & MNT_RELATIME) {
1321 /*
1322 * With relative atime, only update atime if the previous
1323 * atime is earlier than either the ctime or mtime.
1324 */
1325 if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 &&
1326 timespec_compare(&inode->i_ctime, &inode->i_atime) < 0)
1327 goto out;
1328 }
1329 1364
1330 now = current_fs_time(inode->i_sb); 1365 now = current_fs_time(inode->i_sb);
1366
1367 if (!relatime_need_update(mnt, inode, now))
1368 goto out;
1369
1331 if (timespec_equal(&inode->i_atime, &now)) 1370 if (timespec_equal(&inode->i_atime, &now))
1332 goto out; 1371 goto out;
1333 1372
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 240ec63984cb..ac2d47e43926 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -404,10 +404,12 @@ static int ioctl_fionbio(struct file *filp, int __user *argp)
404 if (O_NONBLOCK != O_NDELAY) 404 if (O_NONBLOCK != O_NDELAY)
405 flag |= O_NDELAY; 405 flag |= O_NDELAY;
406#endif 406#endif
407 spin_lock(&filp->f_lock);
407 if (on) 408 if (on)
408 filp->f_flags |= flag; 409 filp->f_flags |= flag;
409 else 410 else
410 filp->f_flags &= ~flag; 411 filp->f_flags &= ~flag;
412 spin_unlock(&filp->f_lock);
411 return error; 413 return error;
412} 414}
413 415
@@ -425,18 +427,12 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
425 /* Did FASYNC state change ? */ 427 /* Did FASYNC state change ? */
426 if ((flag ^ filp->f_flags) & FASYNC) { 428 if ((flag ^ filp->f_flags) & FASYNC) {
427 if (filp->f_op && filp->f_op->fasync) 429 if (filp->f_op && filp->f_op->fasync)
430 /* fasync() adjusts filp->f_flags */
428 error = filp->f_op->fasync(fd, filp, on); 431 error = filp->f_op->fasync(fd, filp, on);
429 else 432 else
430 error = -ENOTTY; 433 error = -ENOTTY;
431 } 434 }
432 if (error) 435 return error < 0 ? error : 0;
433 return error;
434
435 if (on)
436 filp->f_flags |= FASYNC;
437 else
438 filp->f_flags &= ~FASYNC;
439 return error;
440} 436}
441 437
442static int ioctl_fsfreeze(struct file *filp) 438static int ioctl_fsfreeze(struct file *filp)
@@ -499,17 +495,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
499 break; 495 break;
500 496
501 case FIONBIO: 497 case FIONBIO:
502 /* BKL needed to avoid races tweaking f_flags */
503 lock_kernel();
504 error = ioctl_fionbio(filp, argp); 498 error = ioctl_fionbio(filp, argp);
505 unlock_kernel();
506 break; 499 break;
507 500
508 case FIOASYNC: 501 case FIOASYNC:
509 /* BKL needed to avoid races tweaking f_flags */
510 lock_kernel();
511 error = ioctl_fioasync(fd, filp, argp); 502 error = ioctl_fioasync(fd, filp, argp);
512 unlock_kernel();
513 break; 503 break;
514 504
515 case FIOQSIZE: 505 case FIOQSIZE:
diff --git a/fs/namei.c b/fs/namei.c
index 8937f4e78178..1928197b3874 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -24,6 +24,7 @@
24#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
25#include <linux/personality.h> 25#include <linux/personality.h>
26#include <linux/security.h> 26#include <linux/security.h>
27#include <linux/ima.h>
27#include <linux/syscalls.h> 28#include <linux/syscalls.h>
28#include <linux/mount.h> 29#include <linux/mount.h>
29#include <linux/audit.h> 30#include <linux/audit.h>
@@ -850,6 +851,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
850 if (err == -EAGAIN) 851 if (err == -EAGAIN)
851 err = inode_permission(nd->path.dentry->d_inode, 852 err = inode_permission(nd->path.dentry->d_inode,
852 MAY_EXEC); 853 MAY_EXEC);
854 if (!err)
855 err = ima_path_check(&nd->path, MAY_EXEC);
853 if (err) 856 if (err)
854 break; 857 break;
855 858
@@ -1509,6 +1512,11 @@ int may_open(struct path *path, int acc_mode, int flag)
1509 error = inode_permission(inode, acc_mode); 1512 error = inode_permission(inode, acc_mode);
1510 if (error) 1513 if (error)
1511 return error; 1514 return error;
1515
1516 error = ima_path_check(path,
1517 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
1518 if (error)
1519 return error;
1512 /* 1520 /*
1513 * An append-only file must be opened in append mode for writing. 1521 * An append-only file must be opened in append mode for writing.
1514 */ 1522 */
diff --git a/fs/namespace.c b/fs/namespace.c
index 06f8e63f6cb1..f0e753097353 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -780,6 +780,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
780 { MNT_NOATIME, ",noatime" }, 780 { MNT_NOATIME, ",noatime" },
781 { MNT_NODIRATIME, ",nodiratime" }, 781 { MNT_NODIRATIME, ",nodiratime" },
782 { MNT_RELATIME, ",relatime" }, 782 { MNT_RELATIME, ",relatime" },
783 { MNT_STRICTATIME, ",strictatime" },
783 { 0, NULL } 784 { 0, NULL }
784 }; 785 };
785 const struct proc_fs_info *fs_infop; 786 const struct proc_fs_info *fs_infop;
@@ -1919,6 +1920,9 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1919 if (data_page) 1920 if (data_page)
1920 ((char *)data_page)[PAGE_SIZE - 1] = 0; 1921 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1921 1922
1923 /* Default to relatime */
1924 mnt_flags |= MNT_RELATIME;
1925
1922 /* Separate the per-mountpoint flags */ 1926 /* Separate the per-mountpoint flags */
1923 if (flags & MS_NOSUID) 1927 if (flags & MS_NOSUID)
1924 mnt_flags |= MNT_NOSUID; 1928 mnt_flags |= MNT_NOSUID;
@@ -1930,13 +1934,14 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1930 mnt_flags |= MNT_NOATIME; 1934 mnt_flags |= MNT_NOATIME;
1931 if (flags & MS_NODIRATIME) 1935 if (flags & MS_NODIRATIME)
1932 mnt_flags |= MNT_NODIRATIME; 1936 mnt_flags |= MNT_NODIRATIME;
1933 if (flags & MS_RELATIME) 1937 if (flags & MS_STRICTATIME)
1934 mnt_flags |= MNT_RELATIME; 1938 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
1935 if (flags & MS_RDONLY) 1939 if (flags & MS_RDONLY)
1936 mnt_flags |= MNT_READONLY; 1940 mnt_flags |= MNT_READONLY;
1937 1941
1938 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | 1942 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1939 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); 1943 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
1944 MS_STRICTATIME);
1940 1945
1941 /* ... and get the mountpoint */ 1946 /* ... and get the mountpoint */
1942 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); 1947 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ad38fc9e5816..78376b6c0236 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -998,8 +998,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
998 998
999 if (!EX_ISSYNC(exp)) 999 if (!EX_ISSYNC(exp))
1000 stable = 0; 1000 stable = 0;
1001 if (stable && !EX_WGATHER(exp)) 1001 if (stable && !EX_WGATHER(exp)) {
1002 spin_lock(&file->f_lock);
1002 file->f_flags |= O_SYNC; 1003 file->f_flags |= O_SYNC;
1004 spin_unlock(&file->f_lock);
1005 }
1003 1006
1004 /* Write the data. */ 1007 /* Write the data. */
1005 oldfs = get_fs(); set_fs(KERNEL_DS); 1008 oldfs = get_fs(); set_fs(KERNEL_DS);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d720243f5f4..38e337d51ced 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -400,7 +400,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
400 pdev->devt = devt; 400 pdev->devt = devt;
401 401
402 /* delay uevent until 'holders' subdir is created */ 402 /* delay uevent until 'holders' subdir is created */
403 pdev->uevent_suppress = 1; 403 dev_set_uevent_suppress(pdev, 1);
404 err = device_add(pdev); 404 err = device_add(pdev);
405 if (err) 405 if (err)
406 goto out_put; 406 goto out_put;
@@ -410,7 +410,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
410 if (!p->holder_dir) 410 if (!p->holder_dir)
411 goto out_del; 411 goto out_del;
412 412
413 pdev->uevent_suppress = 0; 413 dev_set_uevent_suppress(pdev, 0);
414 if (flags & ADDPART_FLAG_WHOLEDISK) { 414 if (flags & ADDPART_FLAG_WHOLEDISK) {
415 err = device_create_file(pdev, &dev_attr_whole_disk); 415 err = device_create_file(pdev, &dev_attr_whole_disk);
416 if (err) 416 if (err)
@@ -422,7 +422,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
422 rcu_assign_pointer(ptbl->part[partno], p); 422 rcu_assign_pointer(ptbl->part[partno], p);
423 423
424 /* suppress uevent if the disk supresses it */ 424 /* suppress uevent if the disk supresses it */
425 if (!ddev->uevent_suppress) 425 if (!dev_get_uevent_suppress(pdev))
426 kobject_uevent(&pdev->kobj, KOBJ_ADD); 426 kobject_uevent(&pdev->kobj, KOBJ_ADD);
427 427
428 return p; 428 return p;
@@ -455,7 +455,7 @@ void register_disk(struct gendisk *disk)
455 dev_set_name(ddev, disk->disk_name); 455 dev_set_name(ddev, disk->disk_name);
456 456
457 /* delay uevents, until we scanned partition table */ 457 /* delay uevents, until we scanned partition table */
458 ddev->uevent_suppress = 1; 458 dev_set_uevent_suppress(ddev, 1);
459 459
460 if (device_add(ddev)) 460 if (device_add(ddev))
461 return; 461 return;
@@ -490,7 +490,7 @@ void register_disk(struct gendisk *disk)
490 490
491exit: 491exit:
492 /* announce disk after possible partitions are created */ 492 /* announce disk after possible partitions are created */
493 ddev->uevent_suppress = 0; 493 dev_set_uevent_suppress(ddev, 0);
494 kobject_uevent(&ddev->kobj, KOBJ_ADD); 494 kobject_uevent(&ddev->kobj, KOBJ_ADD);
495 495
496 /* announce possible partitions */ 496 /* announce possible partitions */
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 1e064c4a4f86..46297683cd34 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -21,20 +21,38 @@
21 * compute the block number from a 21 * compute the block number from a
22 * cyl-cyl-head-head structure 22 * cyl-cyl-head-head structure
23 */ 23 */
24static inline int 24static sector_t
25cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) { 25cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) {
26 return ptr->cc * geo->heads * geo->sectors + 26
27 ptr->hh * geo->sectors; 27 sector_t cyl;
28 __u16 head;
29
30 /*decode cylinder and heads for large volumes */
31 cyl = ptr->hh & 0xFFF0;
32 cyl <<= 12;
33 cyl |= ptr->cc;
34 head = ptr->hh & 0x000F;
35 return cyl * geo->heads * geo->sectors +
36 head * geo->sectors;
28} 37}
29 38
30/* 39/*
31 * compute the block number from a 40 * compute the block number from a
32 * cyl-cyl-head-head-block structure 41 * cyl-cyl-head-head-block structure
33 */ 42 */
34static inline int 43static sector_t
35cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { 44cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
36 return ptr->cc * geo->heads * geo->sectors + 45
37 ptr->hh * geo->sectors + 46 sector_t cyl;
47 __u16 head;
48
49 /*decode cylinder and heads for large volumes */
50 cyl = ptr->hh & 0xFFF0;
51 cyl <<= 12;
52 cyl |= ptr->cc;
53 head = ptr->hh & 0x000F;
54 return cyl * geo->heads * geo->sectors +
55 head * geo->sectors +
38 ptr->b; 56 ptr->b;
39} 57}
40 58
@@ -43,14 +61,15 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
43int 61int
44ibm_partition(struct parsed_partitions *state, struct block_device *bdev) 62ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
45{ 63{
46 int blocksize, offset, size,res; 64 int blocksize, res;
47 loff_t i_size; 65 loff_t i_size, offset, size, fmt_size;
48 dasd_information2_t *info; 66 dasd_information2_t *info;
49 struct hd_geometry *geo; 67 struct hd_geometry *geo;
50 char type[5] = {0,}; 68 char type[5] = {0,};
51 char name[7] = {0,}; 69 char name[7] = {0,};
52 union label_t { 70 union label_t {
53 struct vtoc_volume_label vol; 71 struct vtoc_volume_label_cdl vol;
72 struct vtoc_volume_label_ldl lnx;
54 struct vtoc_cms_label cms; 73 struct vtoc_cms_label cms;
55 } *label; 74 } *label;
56 unsigned char *data; 75 unsigned char *data;
@@ -85,14 +104,16 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
85 if (data == NULL) 104 if (data == NULL)
86 goto out_readerr; 105 goto out_readerr;
87 106
88 strncpy (type, data, 4);
89 if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD")))
90 strncpy(name, data + 8, 6);
91 else
92 strncpy(name, data + 4, 6);
93 memcpy(label, data, sizeof(union label_t)); 107 memcpy(label, data, sizeof(union label_t));
94 put_dev_sector(sect); 108 put_dev_sector(sect);
95 109
110 if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
111 strncpy(type, label->vol.vollbl, 4);
112 strncpy(name, label->vol.volid, 6);
113 } else {
114 strncpy(type, label->lnx.vollbl, 4);
115 strncpy(name, label->lnx.volid, 6);
116 }
96 EBCASC(type, 4); 117 EBCASC(type, 4);
97 EBCASC(name, 6); 118 EBCASC(name, 6);
98 119
@@ -110,36 +131,54 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
110 /* 131 /*
111 * VM style CMS1 labeled disk 132 * VM style CMS1 labeled disk
112 */ 133 */
134 blocksize = label->cms.block_size;
113 if (label->cms.disk_offset != 0) { 135 if (label->cms.disk_offset != 0) {
114 printk("CMS1/%8s(MDSK):", name); 136 printk("CMS1/%8s(MDSK):", name);
115 /* disk is reserved minidisk */ 137 /* disk is reserved minidisk */
116 blocksize = label->cms.block_size;
117 offset = label->cms.disk_offset; 138 offset = label->cms.disk_offset;
118 size = (label->cms.block_count - 1) 139 size = (label->cms.block_count - 1)
119 * (blocksize >> 9); 140 * (blocksize >> 9);
120 } else { 141 } else {
121 printk("CMS1/%8s:", name); 142 printk("CMS1/%8s:", name);
122 offset = (info->label_block + 1); 143 offset = (info->label_block + 1);
123 size = i_size >> 9; 144 size = label->cms.block_count
145 * (blocksize >> 9);
124 } 146 }
147 put_partition(state, 1, offset*(blocksize >> 9),
148 size-offset*(blocksize >> 9));
125 } else { 149 } else {
126 /* 150 if (strncmp(type, "LNX1", 4) == 0) {
127 * Old style LNX1 or unlabeled disk 151 printk("LNX1/%8s:", name);
128 */ 152 if (label->lnx.ldl_version == 0xf2) {
129 if (strncmp(type, "LNX1", 4) == 0) 153 fmt_size = label->lnx.formatted_blocks
130 printk ("LNX1/%8s:", name); 154 * (blocksize >> 9);
131 else 155 } else if (!strcmp(info->type, "ECKD")) {
156 /* formated w/o large volume support */
157 fmt_size = geo->cylinders * geo->heads
158 * geo->sectors * (blocksize >> 9);
159 } else {
160 /* old label and no usable disk geometry
161 * (e.g. DIAG) */
162 fmt_size = i_size >> 9;
163 }
164 size = i_size >> 9;
165 if (fmt_size < size)
166 size = fmt_size;
167 offset = (info->label_block + 1);
168 } else {
169 /* unlabeled disk */
132 printk("(nonl)"); 170 printk("(nonl)");
133 offset = (info->label_block + 1); 171 size = i_size >> 9;
134 size = i_size >> 9; 172 offset = (info->label_block + 1);
135 } 173 }
136 put_partition(state, 1, offset*(blocksize >> 9), 174 put_partition(state, 1, offset*(blocksize >> 9),
137 size-offset*(blocksize >> 9)); 175 size-offset*(blocksize >> 9));
176 }
138 } else if (info->format == DASD_FORMAT_CDL) { 177 } else if (info->format == DASD_FORMAT_CDL) {
139 /* 178 /*
140 * New style CDL formatted disk 179 * New style CDL formatted disk
141 */ 180 */
142 unsigned int blk; 181 sector_t blk;
143 int counter; 182 int counter;
144 183
145 /* 184 /*
@@ -166,7 +205,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
166 /* skip FMT4 / FMT5 / FMT7 labels */ 205 /* skip FMT4 / FMT5 / FMT7 labels */
167 if (f1.DS1FMTID == _ascebc['4'] 206 if (f1.DS1FMTID == _ascebc['4']
168 || f1.DS1FMTID == _ascebc['5'] 207 || f1.DS1FMTID == _ascebc['5']
169 || f1.DS1FMTID == _ascebc['7']) { 208 || f1.DS1FMTID == _ascebc['7']
209 || f1.DS1FMTID == _ascebc['9']) {
170 blk++; 210 blk++;
171 data = read_dev_sector(bdev, blk * 211 data = read_dev_sector(bdev, blk *
172 (blocksize/512), 212 (blocksize/512),
@@ -174,8 +214,9 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
174 continue; 214 continue;
175 } 215 }
176 216
177 /* only FMT1 valid at this point */ 217 /* only FMT1 and 8 labels valid at this point */
178 if (f1.DS1FMTID != _ascebc['1']) 218 if (f1.DS1FMTID != _ascebc['1'] &&
219 f1.DS1FMTID != _ascebc['8'])
179 break; 220 break;
180 221
181 /* OK, we got valid partition data */ 222 /* OK, we got valid partition data */
diff --git a/fs/pipe.c b/fs/pipe.c
index 14f502b89cf5..94ad15967cf9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -667,10 +667,7 @@ pipe_read_fasync(int fd, struct file *filp, int on)
667 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); 667 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
668 mutex_unlock(&inode->i_mutex); 668 mutex_unlock(&inode->i_mutex);
669 669
670 if (retval < 0) 670 return retval;
671 return retval;
672
673 return 0;
674} 671}
675 672
676 673
@@ -684,10 +681,7 @@ pipe_write_fasync(int fd, struct file *filp, int on)
684 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); 681 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
685 mutex_unlock(&inode->i_mutex); 682 mutex_unlock(&inode->i_mutex);
686 683
687 if (retval < 0) 684 return retval;
688 return retval;
689
690 return 0;
691} 685}
692 686
693 687
@@ -706,11 +700,7 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
706 fasync_helper(-1, filp, 0, &pipe->fasync_readers); 700 fasync_helper(-1, filp, 0, &pipe->fasync_readers);
707 } 701 }
708 mutex_unlock(&inode->i_mutex); 702 mutex_unlock(&inode->i_mutex);
709 703 return retval;
710 if (retval < 0)
711 return retval;
712
713 return 0;
714} 704}
715 705
716 706
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a1bd5eabbe50..eb938015bd91 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1124,10 +1124,7 @@ static void send_warning(const struct dquot *dquot, const char warntype)
1124 goto attr_err_out; 1124 goto attr_err_out;
1125 genlmsg_end(skb, msg_head); 1125 genlmsg_end(skb, msg_head);
1126 1126
1127 ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); 1127 genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
1128 if (ret < 0 && ret != -ESRCH)
1129 printk(KERN_ERR
1130 "VFS: Failed to send notification message: %d\n", ret);
1131 return; 1128 return;
1132attr_err_out: 1129attr_err_out:
1133 printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); 1130 printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
diff --git a/fs/super.c b/fs/super.c
index 0f9d17f2c754..49d0bd32a5a7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -674,7 +674,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
674 return 0; 674 return 0;
675} 675}
676 676
677static void do_emergency_remount(unsigned long foo) 677static void do_emergency_remount(struct work_struct *work)
678{ 678{
679 struct super_block *sb; 679 struct super_block *sb;
680 680
@@ -697,12 +697,19 @@ static void do_emergency_remount(unsigned long foo)
697 spin_lock(&sb_lock); 697 spin_lock(&sb_lock);
698 } 698 }
699 spin_unlock(&sb_lock); 699 spin_unlock(&sb_lock);
700 kfree(work);
700 printk("Emergency Remount complete\n"); 701 printk("Emergency Remount complete\n");
701} 702}
702 703
703void emergency_remount(void) 704void emergency_remount(void)
704{ 705{
705 pdflush_operation(do_emergency_remount, 0); 706 struct work_struct *work;
707
708 work = kmalloc(sizeof(*work), GFP_ATOMIC);
709 if (work) {
710 INIT_WORK(work, do_emergency_remount);
711 schedule_work(work);
712 }
706} 713}
707 714
708/* 715/*
diff --git a/fs/sync.c b/fs/sync.c
index ef36bc921bf3..7abc65fbf21d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,9 +42,21 @@ SYSCALL_DEFINE0(sync)
42 return 0; 42 return 0;
43} 43}
44 44
45static void do_sync_work(struct work_struct *work)
46{
47 do_sync(0);
48 kfree(work);
49}
50
45void emergency_sync(void) 51void emergency_sync(void)
46{ 52{
47 pdflush_operation(do_sync, 0); 53 struct work_struct *work;
54
55 work = kmalloc(sizeof(*work), GFP_ATOMIC);
56 if (work) {
57 INIT_WORK(work, do_sync_work);
58 schedule_work(work);
59 }
48} 60}
49 61
50/* 62/*
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index f2c478c3424e..07703d3ff4a1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -21,15 +21,28 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/mm.h>
24 25
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
26 27
27#include "sysfs.h" 28#include "sysfs.h"
28 29
30/*
31 * There's one bin_buffer for each open file.
32 *
33 * filp->private_data points to bin_buffer and
34 * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
35 * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
36 */
37static DEFINE_MUTEX(sysfs_bin_lock);
38
29struct bin_buffer { 39struct bin_buffer {
30 struct mutex mutex; 40 struct mutex mutex;
31 void *buffer; 41 void *buffer;
32 int mmapped; 42 int mmapped;
43 struct vm_operations_struct *vm_ops;
44 struct file *file;
45 struct hlist_node list;
33}; 46};
34 47
35static int 48static int
@@ -168,6 +181,175 @@ out_free:
168 return count; 181 return count;
169} 182}
170 183
184static void bin_vma_open(struct vm_area_struct *vma)
185{
186 struct file *file = vma->vm_file;
187 struct bin_buffer *bb = file->private_data;
188 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
189
190 if (!bb->vm_ops || !bb->vm_ops->open)
191 return;
192
193 if (!sysfs_get_active_two(attr_sd))
194 return;
195
196 bb->vm_ops->open(vma);
197
198 sysfs_put_active_two(attr_sd);
199}
200
201static void bin_vma_close(struct vm_area_struct *vma)
202{
203 struct file *file = vma->vm_file;
204 struct bin_buffer *bb = file->private_data;
205 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
206
207 if (!bb->vm_ops || !bb->vm_ops->close)
208 return;
209
210 if (!sysfs_get_active_two(attr_sd))
211 return;
212
213 bb->vm_ops->close(vma);
214
215 sysfs_put_active_two(attr_sd);
216}
217
218static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
219{
220 struct file *file = vma->vm_file;
221 struct bin_buffer *bb = file->private_data;
222 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
223 int ret;
224
225 if (!bb->vm_ops || !bb->vm_ops->fault)
226 return VM_FAULT_SIGBUS;
227
228 if (!sysfs_get_active_two(attr_sd))
229 return VM_FAULT_SIGBUS;
230
231 ret = bb->vm_ops->fault(vma, vmf);
232
233 sysfs_put_active_two(attr_sd);
234 return ret;
235}
236
237static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
238{
239 struct file *file = vma->vm_file;
240 struct bin_buffer *bb = file->private_data;
241 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
242 int ret;
243
244 if (!bb->vm_ops)
245 return -EINVAL;
246
247 if (!bb->vm_ops->page_mkwrite)
248 return 0;
249
250 if (!sysfs_get_active_two(attr_sd))
251 return -EINVAL;
252
253 ret = bb->vm_ops->page_mkwrite(vma, page);
254
255 sysfs_put_active_two(attr_sd);
256 return ret;
257}
258
259static int bin_access(struct vm_area_struct *vma, unsigned long addr,
260 void *buf, int len, int write)
261{
262 struct file *file = vma->vm_file;
263 struct bin_buffer *bb = file->private_data;
264 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
265 int ret;
266
267 if (!bb->vm_ops || !bb->vm_ops->access)
268 return -EINVAL;
269
270 if (!sysfs_get_active_two(attr_sd))
271 return -EINVAL;
272
273 ret = bb->vm_ops->access(vma, addr, buf, len, write);
274
275 sysfs_put_active_two(attr_sd);
276 return ret;
277}
278
279#ifdef CONFIG_NUMA
280static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
281{
282 struct file *file = vma->vm_file;
283 struct bin_buffer *bb = file->private_data;
284 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
285 int ret;
286
287 if (!bb->vm_ops || !bb->vm_ops->set_policy)
288 return 0;
289
290 if (!sysfs_get_active_two(attr_sd))
291 return -EINVAL;
292
293 ret = bb->vm_ops->set_policy(vma, new);
294
295 sysfs_put_active_two(attr_sd);
296 return ret;
297}
298
299static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
300 unsigned long addr)
301{
302 struct file *file = vma->vm_file;
303 struct bin_buffer *bb = file->private_data;
304 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
305 struct mempolicy *pol;
306
307 if (!bb->vm_ops || !bb->vm_ops->get_policy)
308 return vma->vm_policy;
309
310 if (!sysfs_get_active_two(attr_sd))
311 return vma->vm_policy;
312
313 pol = bb->vm_ops->get_policy(vma, addr);
314
315 sysfs_put_active_two(attr_sd);
316 return pol;
317}
318
319static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
320 const nodemask_t *to, unsigned long flags)
321{
322 struct file *file = vma->vm_file;
323 struct bin_buffer *bb = file->private_data;
324 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
325 int ret;
326
327 if (!bb->vm_ops || !bb->vm_ops->migrate)
328 return 0;
329
330 if (!sysfs_get_active_two(attr_sd))
331 return 0;
332
333 ret = bb->vm_ops->migrate(vma, from, to, flags);
334
335 sysfs_put_active_two(attr_sd);
336 return ret;
337}
338#endif
339
340static struct vm_operations_struct bin_vm_ops = {
341 .open = bin_vma_open,
342 .close = bin_vma_close,
343 .fault = bin_fault,
344 .page_mkwrite = bin_page_mkwrite,
345 .access = bin_access,
346#ifdef CONFIG_NUMA
347 .set_policy = bin_set_policy,
348 .get_policy = bin_get_policy,
349 .migrate = bin_migrate,
350#endif
351};
352
171static int mmap(struct file *file, struct vm_area_struct *vma) 353static int mmap(struct file *file, struct vm_area_struct *vma)
172{ 354{
173 struct bin_buffer *bb = file->private_data; 355 struct bin_buffer *bb = file->private_data;
@@ -179,18 +361,37 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
179 mutex_lock(&bb->mutex); 361 mutex_lock(&bb->mutex);
180 362
181 /* need attr_sd for attr, its parent for kobj */ 363 /* need attr_sd for attr, its parent for kobj */
364 rc = -ENODEV;
182 if (!sysfs_get_active_two(attr_sd)) 365 if (!sysfs_get_active_two(attr_sd))
183 return -ENODEV; 366 goto out_unlock;
184 367
185 rc = -EINVAL; 368 rc = -EINVAL;
186 if (attr->mmap) 369 if (!attr->mmap)
187 rc = attr->mmap(kobj, attr, vma); 370 goto out_put;
371
372 rc = attr->mmap(kobj, attr, vma);
373 if (rc)
374 goto out_put;
188 375
189 if (rc == 0 && !bb->mmapped) 376 /*
190 bb->mmapped = 1; 377 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
191 else 378 * to satisfy versions of X which crash if the mmap fails: that
192 sysfs_put_active_two(attr_sd); 379 * substitutes a new vm_file, and we don't then want bin_vm_ops.
380 */
381 if (vma->vm_file != file)
382 goto out_put;
193 383
384 rc = -EINVAL;
385 if (bb->mmapped && bb->vm_ops != vma->vm_ops)
386 goto out_put;
387
388 rc = 0;
389 bb->mmapped = 1;
390 bb->vm_ops = vma->vm_ops;
391 vma->vm_ops = &bin_vm_ops;
392out_put:
393 sysfs_put_active_two(attr_sd);
394out_unlock:
194 mutex_unlock(&bb->mutex); 395 mutex_unlock(&bb->mutex);
195 396
196 return rc; 397 return rc;
@@ -223,8 +424,13 @@ static int open(struct inode * inode, struct file * file)
223 goto err_out; 424 goto err_out;
224 425
225 mutex_init(&bb->mutex); 426 mutex_init(&bb->mutex);
427 bb->file = file;
226 file->private_data = bb; 428 file->private_data = bb;
227 429
430 mutex_lock(&sysfs_bin_lock);
431 hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
432 mutex_unlock(&sysfs_bin_lock);
433
228 /* open succeeded, put active references */ 434 /* open succeeded, put active references */
229 sysfs_put_active_two(attr_sd); 435 sysfs_put_active_two(attr_sd);
230 return 0; 436 return 0;
@@ -237,11 +443,12 @@ static int open(struct inode * inode, struct file * file)
237 443
238static int release(struct inode * inode, struct file * file) 444static int release(struct inode * inode, struct file * file)
239{ 445{
240 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
241 struct bin_buffer *bb = file->private_data; 446 struct bin_buffer *bb = file->private_data;
242 447
243 if (bb->mmapped) 448 mutex_lock(&sysfs_bin_lock);
244 sysfs_put_active_two(attr_sd); 449 hlist_del(&bb->list);
450 mutex_unlock(&sysfs_bin_lock);
451
245 kfree(bb->buffer); 452 kfree(bb->buffer);
246 kfree(bb); 453 kfree(bb);
247 return 0; 454 return 0;
@@ -256,6 +463,26 @@ const struct file_operations bin_fops = {
256 .release = release, 463 .release = release,
257}; 464};
258 465
466
467void unmap_bin_file(struct sysfs_dirent *attr_sd)
468{
469 struct bin_buffer *bb;
470 struct hlist_node *tmp;
471
472 if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
473 return;
474
475 mutex_lock(&sysfs_bin_lock);
476
477 hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
478 struct inode *inode = bb->file->f_path.dentry->d_inode;
479
480 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
481 }
482
483 mutex_unlock(&sysfs_bin_lock);
484}
485
259/** 486/**
260 * sysfs_create_bin_file - create binary file for object. 487 * sysfs_create_bin_file - create binary file for object.
261 * @kobj: object. 488 * @kobj: object.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 82d3b79d0e08..66aeb4fff0c3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -434,6 +434,26 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
434} 434}
435 435
436/** 436/**
437 * sysfs_pathname - return full path to sysfs dirent
438 * @sd: sysfs_dirent whose path we want
439 * @path: caller allocated buffer
440 *
441 * Gives the name "/" to the sysfs_root entry; any path returned
442 * is relative to wherever sysfs is mounted.
443 *
444 * XXX: does no error checking on @path size
445 */
446static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
447{
448 if (sd->s_parent) {
449 sysfs_pathname(sd->s_parent, path);
450 strcat(path, "/");
451 }
452 strcat(path, sd->s_name);
453 return path;
454}
455
456/**
437 * sysfs_add_one - add sysfs_dirent to parent 457 * sysfs_add_one - add sysfs_dirent to parent
438 * @acxt: addrm context to use 458 * @acxt: addrm context to use
439 * @sd: sysfs_dirent to be added 459 * @sd: sysfs_dirent to be added
@@ -458,8 +478,16 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
458 int ret; 478 int ret;
459 479
460 ret = __sysfs_add_one(acxt, sd); 480 ret = __sysfs_add_one(acxt, sd);
461 WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' " 481 if (ret == -EEXIST) {
462 "can not be created\n", sd->s_name); 482 char *path = kzalloc(PATH_MAX, GFP_KERNEL);
483 WARN(1, KERN_WARNING
484 "sysfs: cannot create duplicate filename '%s'\n",
485 (path == NULL) ? sd->s_name :
486 strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
487 sd->s_name));
488 kfree(path);
489 }
490
463 return ret; 491 return ret;
464} 492}
465 493
@@ -581,6 +609,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
581 609
582 sysfs_drop_dentry(sd); 610 sysfs_drop_dentry(sd);
583 sysfs_deactivate(sd); 611 sysfs_deactivate(sd);
612 unmap_bin_file(sd);
584 sysfs_put(sd); 613 sysfs_put(sd);
585 } 614 }
586} 615}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1f4a3f877262..289c43a47263 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -659,13 +659,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
659EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); 659EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
660 660
661struct sysfs_schedule_callback_struct { 661struct sysfs_schedule_callback_struct {
662 struct kobject *kobj; 662 struct list_head workq_list;
663 struct kobject *kobj;
663 void (*func)(void *); 664 void (*func)(void *);
664 void *data; 665 void *data;
665 struct module *owner; 666 struct module *owner;
666 struct work_struct work; 667 struct work_struct work;
667}; 668};
668 669
670static DEFINE_MUTEX(sysfs_workq_mutex);
671static LIST_HEAD(sysfs_workq);
669static void sysfs_schedule_callback_work(struct work_struct *work) 672static void sysfs_schedule_callback_work(struct work_struct *work)
670{ 673{
671 struct sysfs_schedule_callback_struct *ss = container_of(work, 674 struct sysfs_schedule_callback_struct *ss = container_of(work,
@@ -674,6 +677,9 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
674 (ss->func)(ss->data); 677 (ss->func)(ss->data);
675 kobject_put(ss->kobj); 678 kobject_put(ss->kobj);
676 module_put(ss->owner); 679 module_put(ss->owner);
680 mutex_lock(&sysfs_workq_mutex);
681 list_del(&ss->workq_list);
682 mutex_unlock(&sysfs_workq_mutex);
677 kfree(ss); 683 kfree(ss);
678} 684}
679 685
@@ -695,15 +701,25 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
695 * until @func returns. 701 * until @func returns.
696 * 702 *
697 * Returns 0 if the request was submitted, -ENOMEM if storage could not 703 * Returns 0 if the request was submitted, -ENOMEM if storage could not
698 * be allocated, -ENODEV if a reference to @owner isn't available. 704 * be allocated, -ENODEV if a reference to @owner isn't available,
705 * -EAGAIN if a callback has already been scheduled for @kobj.
699 */ 706 */
700int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), 707int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
701 void *data, struct module *owner) 708 void *data, struct module *owner)
702{ 709{
703 struct sysfs_schedule_callback_struct *ss; 710 struct sysfs_schedule_callback_struct *ss, *tmp;
704 711
705 if (!try_module_get(owner)) 712 if (!try_module_get(owner))
706 return -ENODEV; 713 return -ENODEV;
714
715 mutex_lock(&sysfs_workq_mutex);
716 list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
717 if (ss->kobj == kobj) {
718 mutex_unlock(&sysfs_workq_mutex);
719 return -EAGAIN;
720 }
721 mutex_unlock(&sysfs_workq_mutex);
722
707 ss = kmalloc(sizeof(*ss), GFP_KERNEL); 723 ss = kmalloc(sizeof(*ss), GFP_KERNEL);
708 if (!ss) { 724 if (!ss) {
709 module_put(owner); 725 module_put(owner);
@@ -715,6 +731,10 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
715 ss->data = data; 731 ss->data = data;
716 ss->owner = owner; 732 ss->owner = owner;
717 INIT_WORK(&ss->work, sysfs_schedule_callback_work); 733 INIT_WORK(&ss->work, sysfs_schedule_callback_work);
734 INIT_LIST_HEAD(&ss->workq_list);
735 mutex_lock(&sysfs_workq_mutex);
736 list_add_tail(&ss->workq_list, &sysfs_workq);
737 mutex_unlock(&sysfs_workq_mutex);
718 schedule_work(&ss->work); 738 schedule_work(&ss->work);
719 return 0; 739 return 0;
720} 740}
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index dfa3d94cfc74..555f0ff988df 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -147,6 +147,7 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
147{ 147{
148 struct bin_attribute *bin_attr; 148 struct bin_attribute *bin_attr;
149 149
150 inode->i_private = sysfs_get(sd);
150 inode->i_mapping->a_ops = &sysfs_aops; 151 inode->i_mapping->a_ops = &sysfs_aops;
151 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 152 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
152 inode->i_op = &sysfs_inode_operations; 153 inode->i_op = &sysfs_inode_operations;
@@ -214,6 +215,22 @@ struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
214 return inode; 215 return inode;
215} 216}
216 217
218/*
219 * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
220 * To prevent the sysfs inode numbers from being freed prematurely we take a
221 * reference to sysfs_dirent from the sysfs inode. A
222 * super_operations.delete_inode() implementation is needed to drop that
223 * reference upon inode destruction.
224 */
225void sysfs_delete_inode(struct inode *inode)
226{
227 struct sysfs_dirent *sd = inode->i_private;
228
229 truncate_inode_pages(&inode->i_data, 0);
230 clear_inode(inode);
231 sysfs_put(sd);
232}
233
217int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) 234int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
218{ 235{
219 struct sysfs_addrm_cxt acxt; 236 struct sysfs_addrm_cxt acxt;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index ab343e371d64..49749955ccaf 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -17,11 +17,10 @@
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/magic.h>
20 21
21#include "sysfs.h" 22#include "sysfs.h"
22 23
23/* Random magic number */
24#define SYSFS_MAGIC 0x62656572
25 24
26static struct vfsmount *sysfs_mount; 25static struct vfsmount *sysfs_mount;
27struct super_block * sysfs_sb = NULL; 26struct super_block * sysfs_sb = NULL;
@@ -30,6 +29,7 @@ struct kmem_cache *sysfs_dir_cachep;
30static const struct super_operations sysfs_ops = { 29static const struct super_operations sysfs_ops = {
31 .statfs = simple_statfs, 30 .statfs = simple_statfs,
32 .drop_inode = generic_delete_inode, 31 .drop_inode = generic_delete_inode,
32 .delete_inode = sysfs_delete_inode,
33}; 33};
34 34
35struct sysfs_dirent sysfs_root = { 35struct sysfs_dirent sysfs_root = {
@@ -53,7 +53,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
53 sysfs_sb = sb; 53 sysfs_sb = sb;
54 54
55 /* get root inode, initialize and unlock it */ 55 /* get root inode, initialize and unlock it */
56 mutex_lock(&sysfs_mutex);
56 inode = sysfs_get_inode(&sysfs_root); 57 inode = sysfs_get_inode(&sysfs_root);
58 mutex_unlock(&sysfs_mutex);
57 if (!inode) { 59 if (!inode) {
58 pr_debug("sysfs: could not get root inode\n"); 60 pr_debug("sysfs: could not get root inode\n");
59 return -ENOMEM; 61 return -ENOMEM;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 93c6d6b27c4d..3fa0d98481e2 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -28,6 +28,7 @@ struct sysfs_elem_attr {
28 28
29struct sysfs_elem_bin_attr { 29struct sysfs_elem_bin_attr {
30 struct bin_attribute *bin_attr; 30 struct bin_attribute *bin_attr;
31 struct hlist_head buffers;
31}; 32};
32 33
33/* 34/*
@@ -145,6 +146,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
145 * inode.c 146 * inode.c
146 */ 147 */
147struct inode *sysfs_get_inode(struct sysfs_dirent *sd); 148struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
149void sysfs_delete_inode(struct inode *inode);
148int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 150int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
149int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); 151int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
150int sysfs_inode_init(void); 152int sysfs_inode_init(void);
@@ -163,6 +165,7 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
163 * bin.c 165 * bin.c
164 */ 166 */
165extern const struct file_operations bin_fops; 167extern const struct file_operations bin_fops;
168void unmap_bin_file(struct sysfs_dirent *attr_sd);
166 169
167/* 170/*
168 * symlink.c 171 * symlink.c