aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bio-integrity.c29
-rw-r--r--fs/bio.c297
-rw-r--r--fs/block_dev.c182
-rw-r--r--fs/cifs/CHANGES5
-rw-r--r--fs/cifs/README14
-rw-r--r--fs/cifs/cifsencrypt.c1
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/dcache.c10
-rw-r--r--fs/exec.c2
-rw-r--r--fs/fat/fatent.c14
-rw-r--r--fs/gfs2/glock.c15
-rw-r--r--fs/gfs2/glock.h1
-rw-r--r--fs/gfs2/incore.h38
-rw-r--r--fs/gfs2/inode.c159
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/log.c21
-rw-r--r--fs/gfs2/mount.c7
-rw-r--r--fs/gfs2/ops_address.c18
-rw-r--r--fs/gfs2/ops_file.c16
-rw-r--r--fs/gfs2/ops_fstype.c578
-rw-r--r--fs/gfs2/ops_inode.c127
-rw-r--r--fs/gfs2/ops_super.c108
-rw-r--r--fs/gfs2/super.c340
-rw-r--r--fs/gfs2/super.h6
-rw-r--r--fs/gfs2/sys.c11
-rw-r--r--fs/inotify_user.c27
-rw-r--r--fs/nfs/super.c6
-rw-r--r--fs/nfsd/nfs4acl.c2
-rw-r--r--fs/nfsd/nfs4proc.c12
-rw-r--r--fs/ntfs/usnjrnl.h4
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/partitions/check.c272
-rw-r--r--fs/partitions/check.h4
-rw-r--r--fs/proc/array.c59
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/proc_misc.c7
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/splice.c3
-rw-r--r--fs/ubifs/budget.c114
-rw-r--r--fs/ubifs/debug.c2
-rw-r--r--fs/ubifs/dir.c3
-rw-r--r--fs/ubifs/file.c20
-rw-r--r--fs/ubifs/find.c19
-rw-r--r--fs/ubifs/gc.c20
-rw-r--r--fs/ubifs/misc.h49
-rw-r--r--fs/ubifs/super.c25
-rw-r--r--fs/ubifs/tnc.c116
-rw-r--r--fs/ubifs/ubifs-media.h2
-rw-r--r--fs/ubifs/ubifs.h14
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/ialloc.c44
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c20
-rw-r--r--fs/xfs/xfs_buf_item.c44
-rw-r--r--fs/xfs/xfs_dfrag.c9
-rw-r--r--fs/xfs/xfs_inode.c94
-rw-r--r--fs/xfs/xfs_log.c67
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c26
64 files changed, 1624 insertions, 1497 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c95295c65045..e83aa5ebe861 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -626,8 +626,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
626 return NULL; 626 return NULL;
627 627
628error: 628error:
629 if (fid) 629 p9_client_clunk(fid);
630 p9_client_clunk(fid);
631 630
632 return ERR_PTR(result); 631 return ERR_PTR(result);
633} 632}
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 87ee5ccee348..ed8feb052df9 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -125,8 +125,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
125 inode->i_ino); 125 inode->i_ino);
126 if (err) { 126 if (err) {
127 inode_dec_link_count(inode); 127 inode_dec_link_count(inode);
128 iput(inode);
129 mutex_unlock(&info->bfs_lock); 128 mutex_unlock(&info->bfs_lock);
129 iput(inode);
130 return err; 130 return err;
131 } 131 }
132 mutex_unlock(&info->bfs_lock); 132 mutex_unlock(&info->bfs_lock);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c3e174b35fe6..19caf7c962ac 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
107 BUG_ON(bip == NULL); 107 BUG_ON(bip == NULL);
108 108
109 /* A cloned bio doesn't own the integrity metadata */ 109 /* A cloned bio doesn't own the integrity metadata */
110 if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) 110 if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
111 && bip->bip_buf != NULL)
111 kfree(bip->bip_buf); 112 kfree(bip->bip_buf);
112 113
113 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); 114 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
@@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
150} 151}
151EXPORT_SYMBOL(bio_integrity_add_page); 152EXPORT_SYMBOL(bio_integrity_add_page);
152 153
154static int bdev_integrity_enabled(struct block_device *bdev, int rw)
155{
156 struct blk_integrity *bi = bdev_get_integrity(bdev);
157
158 if (bi == NULL)
159 return 0;
160
161 if (rw == READ && bi->verify_fn != NULL &&
162 (bi->flags & INTEGRITY_FLAG_READ))
163 return 1;
164
165 if (rw == WRITE && bi->generate_fn != NULL &&
166 (bi->flags & INTEGRITY_FLAG_WRITE))
167 return 1;
168
169 return 0;
170}
171
153/** 172/**
154 * bio_integrity_enabled - Check whether integrity can be passed 173 * bio_integrity_enabled - Check whether integrity can be passed
155 * @bio: bio to check 174 * @bio: bio to check
@@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio)
313 } 332 }
314} 333}
315 334
335static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
336{
337 if (bi)
338 return bi->tuple_size;
339
340 return 0;
341}
342
316/** 343/**
317 * bio_integrity_prep - Prepare bio for integrity I/O 344 * bio_integrity_prep - Prepare bio for integrity I/O
318 * @bio: bio to prepare 345 * @bio: bio to prepare
diff --git a/fs/bio.c b/fs/bio.c
index 3cba7ae34d75..77a55bcceedb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
30 30
31static struct kmem_cache *bio_slab __read_mostly; 31static struct kmem_cache *bio_slab __read_mostly;
32 32
33mempool_t *bio_split_pool __read_mostly; 33static mempool_t *bio_split_pool __read_mostly;
34 34
35/* 35/*
36 * if you change this list, also change bvec_alloc or things will 36 * if you change this list, also change bvec_alloc or things will
@@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
60 struct bio_vec *bvl; 60 struct bio_vec *bvl;
61 61
62 /* 62 /*
63 * see comment near bvec_array define! 63 * If 'bs' is given, lookup the pool and do the mempool alloc.
64 * If not, this is a bio_kmalloc() allocation and just do a
65 * kzalloc() for the exact number of vecs right away.
64 */ 66 */
65 switch (nr) { 67 if (bs) {
66 case 1 : *idx = 0; break; 68 /*
67 case 2 ... 4: *idx = 1; break; 69 * see comment near bvec_array define!
68 case 5 ... 16: *idx = 2; break; 70 */
69 case 17 ... 64: *idx = 3; break; 71 switch (nr) {
70 case 65 ... 128: *idx = 4; break; 72 case 1:
71 case 129 ... BIO_MAX_PAGES: *idx = 5; break; 73 *idx = 0;
74 break;
75 case 2 ... 4:
76 *idx = 1;
77 break;
78 case 5 ... 16:
79 *idx = 2;
80 break;
81 case 17 ... 64:
82 *idx = 3;
83 break;
84 case 65 ... 128:
85 *idx = 4;
86 break;
87 case 129 ... BIO_MAX_PAGES:
88 *idx = 5;
89 break;
72 default: 90 default:
73 return NULL; 91 return NULL;
74 } 92 }
75 /*
76 * idx now points to the pool we want to allocate from
77 */
78 93
79 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 94 /*
80 if (bvl) 95 * idx now points to the pool we want to allocate from
81 memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); 96 */
97 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
98 if (bvl)
99 memset(bvl, 0,
100 bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
101 } else
102 bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
82 103
83 return bvl; 104 return bvl;
84} 105}
@@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio)
107 bio_free(bio, fs_bio_set); 128 bio_free(bio, fs_bio_set);
108} 129}
109 130
131static void bio_kmalloc_destructor(struct bio *bio)
132{
133 kfree(bio->bi_io_vec);
134 kfree(bio);
135}
136
110void bio_init(struct bio *bio) 137void bio_init(struct bio *bio)
111{ 138{
112 memset(bio, 0, sizeof(*bio)); 139 memset(bio, 0, sizeof(*bio));
113 bio->bi_flags = 1 << BIO_UPTODATE; 140 bio->bi_flags = 1 << BIO_UPTODATE;
141 bio->bi_comp_cpu = -1;
114 atomic_set(&bio->bi_cnt, 1); 142 atomic_set(&bio->bi_cnt, 1);
115} 143}
116 144
@@ -118,19 +146,25 @@ void bio_init(struct bio *bio)
118 * bio_alloc_bioset - allocate a bio for I/O 146 * bio_alloc_bioset - allocate a bio for I/O
119 * @gfp_mask: the GFP_ mask given to the slab allocator 147 * @gfp_mask: the GFP_ mask given to the slab allocator
120 * @nr_iovecs: number of iovecs to pre-allocate 148 * @nr_iovecs: number of iovecs to pre-allocate
121 * @bs: the bio_set to allocate from 149 * @bs: the bio_set to allocate from. If %NULL, just use kmalloc
122 * 150 *
123 * Description: 151 * Description:
124 * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. 152 * bio_alloc_bioset will first try its own mempool to satisfy the allocation.
125 * If %__GFP_WAIT is set then we will block on the internal pool waiting 153 * If %__GFP_WAIT is set then we will block on the internal pool waiting
126 * for a &struct bio to become free. 154 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
155 * fall back to just using @kmalloc to allocate the required memory.
127 * 156 *
128 * allocate bio and iovecs from the memory pools specified by the 157 * allocate bio and iovecs from the memory pools specified by the
129 * bio_set structure. 158 * bio_set structure, or @kmalloc if none given.
130 **/ 159 **/
131struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 160struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
132{ 161{
133 struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); 162 struct bio *bio;
163
164 if (bs)
165 bio = mempool_alloc(bs->bio_pool, gfp_mask);
166 else
167 bio = kmalloc(sizeof(*bio), gfp_mask);
134 168
135 if (likely(bio)) { 169 if (likely(bio)) {
136 struct bio_vec *bvl = NULL; 170 struct bio_vec *bvl = NULL;
@@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
141 175
142 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 176 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
143 if (unlikely(!bvl)) { 177 if (unlikely(!bvl)) {
144 mempool_free(bio, bs->bio_pool); 178 if (bs)
179 mempool_free(bio, bs->bio_pool);
180 else
181 kfree(bio);
145 bio = NULL; 182 bio = NULL;
146 goto out; 183 goto out;
147 } 184 }
@@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
164 return bio; 201 return bio;
165} 202}
166 203
204/*
205 * Like bio_alloc(), but doesn't use a mempool backing. This means that
206 * it CAN fail, but while bio_alloc() can only be used for allocations
207 * that have a short (finite) life span, bio_kmalloc() should be used
208 * for more permanent bio allocations (like allocating some bio's for
209 * initalization or setup purposes).
210 */
211struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
212{
213 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
214
215 if (bio)
216 bio->bi_destructor = bio_kmalloc_destructor;
217
218 return bio;
219}
220
167void zero_fill_bio(struct bio *bio) 221void zero_fill_bio(struct bio *bio)
168{ 222{
169 unsigned long flags; 223 unsigned long flags;
@@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
208 return bio->bi_phys_segments; 262 return bio->bi_phys_segments;
209} 263}
210 264
211inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
212{
213 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
214 blk_recount_segments(q, bio);
215
216 return bio->bi_hw_segments;
217}
218
219/** 265/**
220 * __bio_clone - clone a bio 266 * __bio_clone - clone a bio
221 * @bio: destination bio 267 * @bio: destination bio
@@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
350 */ 396 */
351 397
352 while (bio->bi_phys_segments >= q->max_phys_segments 398 while (bio->bi_phys_segments >= q->max_phys_segments
353 || bio->bi_hw_segments >= q->max_hw_segments 399 || bio->bi_phys_segments >= q->max_hw_segments) {
354 || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
355 400
356 if (retried_segments) 401 if (retried_segments)
357 return 0; 402 return 0;
@@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
395 } 440 }
396 441
397 /* If we may be able to merge these biovecs, force a recount */ 442 /* If we may be able to merge these biovecs, force a recount */
398 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || 443 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
399 BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
400 bio->bi_flags &= ~(1 << BIO_SEG_VALID); 444 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
401 445
402 bio->bi_vcnt++; 446 bio->bi_vcnt++;
403 bio->bi_phys_segments++; 447 bio->bi_phys_segments++;
404 bio->bi_hw_segments++;
405 done: 448 done:
406 bio->bi_size += len; 449 bio->bi_size += len;
407 return len; 450 return len;
@@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
449 492
450struct bio_map_data { 493struct bio_map_data {
451 struct bio_vec *iovecs; 494 struct bio_vec *iovecs;
452 int nr_sgvecs;
453 struct sg_iovec *sgvecs; 495 struct sg_iovec *sgvecs;
496 int nr_sgvecs;
497 int is_our_pages;
454}; 498};
455 499
456static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, 500static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
457 struct sg_iovec *iov, int iov_count) 501 struct sg_iovec *iov, int iov_count,
502 int is_our_pages)
458{ 503{
459 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); 504 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
460 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); 505 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
461 bmd->nr_sgvecs = iov_count; 506 bmd->nr_sgvecs = iov_count;
507 bmd->is_our_pages = is_our_pages;
462 bio->bi_private = bmd; 508 bio->bi_private = bmd;
463} 509}
464 510
@@ -493,7 +539,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
493} 539}
494 540
495static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, 541static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
496 struct sg_iovec *iov, int iov_count, int uncopy) 542 struct sg_iovec *iov, int iov_count, int uncopy,
543 int do_free_page)
497{ 544{
498 int ret = 0, i; 545 int ret = 0, i;
499 struct bio_vec *bvec; 546 struct bio_vec *bvec;
@@ -536,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
536 } 583 }
537 } 584 }
538 585
539 if (uncopy) 586 if (do_free_page)
540 __free_page(bvec->bv_page); 587 __free_page(bvec->bv_page);
541 } 588 }
542 589
@@ -553,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
553int bio_uncopy_user(struct bio *bio) 600int bio_uncopy_user(struct bio *bio)
554{ 601{
555 struct bio_map_data *bmd = bio->bi_private; 602 struct bio_map_data *bmd = bio->bi_private;
556 int ret; 603 int ret = 0;
557
558 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
559 604
605 if (!bio_flagged(bio, BIO_NULL_MAPPED))
606 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
607 bmd->nr_sgvecs, 1, bmd->is_our_pages);
560 bio_free_map_data(bmd); 608 bio_free_map_data(bmd);
561 bio_put(bio); 609 bio_put(bio);
562 return ret; 610 return ret;
@@ -565,16 +613,20 @@ int bio_uncopy_user(struct bio *bio)
565/** 613/**
566 * bio_copy_user_iov - copy user data to bio 614 * bio_copy_user_iov - copy user data to bio
567 * @q: destination block queue 615 * @q: destination block queue
616 * @map_data: pointer to the rq_map_data holding pages (if necessary)
568 * @iov: the iovec. 617 * @iov: the iovec.
569 * @iov_count: number of elements in the iovec 618 * @iov_count: number of elements in the iovec
570 * @write_to_vm: bool indicating writing to pages or not 619 * @write_to_vm: bool indicating writing to pages or not
620 * @gfp_mask: memory allocation flags
571 * 621 *
572 * Prepares and returns a bio for indirect user io, bouncing data 622 * Prepares and returns a bio for indirect user io, bouncing data
573 * to/from kernel pages as necessary. Must be paired with 623 * to/from kernel pages as necessary. Must be paired with
574 * call bio_uncopy_user() on io completion. 624 * call bio_uncopy_user() on io completion.
575 */ 625 */
576struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, 626struct bio *bio_copy_user_iov(struct request_queue *q,
577 int iov_count, int write_to_vm) 627 struct rq_map_data *map_data,
628 struct sg_iovec *iov, int iov_count,
629 int write_to_vm, gfp_t gfp_mask)
578{ 630{
579 struct bio_map_data *bmd; 631 struct bio_map_data *bmd;
580 struct bio_vec *bvec; 632 struct bio_vec *bvec;
@@ -597,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
597 len += iov[i].iov_len; 649 len += iov[i].iov_len;
598 } 650 }
599 651
600 bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); 652 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
601 if (!bmd) 653 if (!bmd)
602 return ERR_PTR(-ENOMEM); 654 return ERR_PTR(-ENOMEM);
603 655
604 ret = -ENOMEM; 656 ret = -ENOMEM;
605 bio = bio_alloc(GFP_KERNEL, nr_pages); 657 bio = bio_alloc(gfp_mask, nr_pages);
606 if (!bio) 658 if (!bio)
607 goto out_bmd; 659 goto out_bmd;
608 660
609 bio->bi_rw |= (!write_to_vm << BIO_RW); 661 bio->bi_rw |= (!write_to_vm << BIO_RW);
610 662
611 ret = 0; 663 ret = 0;
664 i = 0;
612 while (len) { 665 while (len) {
613 unsigned int bytes = PAGE_SIZE; 666 unsigned int bytes;
667
668 if (map_data)
669 bytes = 1U << (PAGE_SHIFT + map_data->page_order);
670 else
671 bytes = PAGE_SIZE;
614 672
615 if (bytes > len) 673 if (bytes > len)
616 bytes = len; 674 bytes = len;
617 675
618 page = alloc_page(q->bounce_gfp | GFP_KERNEL); 676 if (map_data) {
677 if (i == map_data->nr_entries) {
678 ret = -ENOMEM;
679 break;
680 }
681 page = map_data->pages[i++];
682 } else
683 page = alloc_page(q->bounce_gfp | gfp_mask);
619 if (!page) { 684 if (!page) {
620 ret = -ENOMEM; 685 ret = -ENOMEM;
621 break; 686 break;
@@ -634,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
634 * success 699 * success
635 */ 700 */
636 if (!write_to_vm) { 701 if (!write_to_vm) {
637 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); 702 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
638 if (ret) 703 if (ret)
639 goto cleanup; 704 goto cleanup;
640 } 705 }
641 706
642 bio_set_map_data(bmd, bio, iov, iov_count); 707 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
643 return bio; 708 return bio;
644cleanup: 709cleanup:
645 bio_for_each_segment(bvec, bio, i) 710 if (!map_data)
646 __free_page(bvec->bv_page); 711 bio_for_each_segment(bvec, bio, i)
712 __free_page(bvec->bv_page);
647 713
648 bio_put(bio); 714 bio_put(bio);
649out_bmd: 715out_bmd:
@@ -654,29 +720,32 @@ out_bmd:
654/** 720/**
655 * bio_copy_user - copy user data to bio 721 * bio_copy_user - copy user data to bio
656 * @q: destination block queue 722 * @q: destination block queue
723 * @map_data: pointer to the rq_map_data holding pages (if necessary)
657 * @uaddr: start of user address 724 * @uaddr: start of user address
658 * @len: length in bytes 725 * @len: length in bytes
659 * @write_to_vm: bool indicating writing to pages or not 726 * @write_to_vm: bool indicating writing to pages or not
727 * @gfp_mask: memory allocation flags
660 * 728 *
661 * Prepares and returns a bio for indirect user io, bouncing data 729 * Prepares and returns a bio for indirect user io, bouncing data
662 * to/from kernel pages as necessary. Must be paired with 730 * to/from kernel pages as necessary. Must be paired with
663 * call bio_uncopy_user() on io completion. 731 * call bio_uncopy_user() on io completion.
664 */ 732 */
665struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, 733struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
666 unsigned int len, int write_to_vm) 734 unsigned long uaddr, unsigned int len,
735 int write_to_vm, gfp_t gfp_mask)
667{ 736{
668 struct sg_iovec iov; 737 struct sg_iovec iov;
669 738
670 iov.iov_base = (void __user *)uaddr; 739 iov.iov_base = (void __user *)uaddr;
671 iov.iov_len = len; 740 iov.iov_len = len;
672 741
673 return bio_copy_user_iov(q, &iov, 1, write_to_vm); 742 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
674} 743}
675 744
676static struct bio *__bio_map_user_iov(struct request_queue *q, 745static struct bio *__bio_map_user_iov(struct request_queue *q,
677 struct block_device *bdev, 746 struct block_device *bdev,
678 struct sg_iovec *iov, int iov_count, 747 struct sg_iovec *iov, int iov_count,
679 int write_to_vm) 748 int write_to_vm, gfp_t gfp_mask)
680{ 749{
681 int i, j; 750 int i, j;
682 int nr_pages = 0; 751 int nr_pages = 0;
@@ -702,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
702 if (!nr_pages) 771 if (!nr_pages)
703 return ERR_PTR(-EINVAL); 772 return ERR_PTR(-EINVAL);
704 773
705 bio = bio_alloc(GFP_KERNEL, nr_pages); 774 bio = bio_alloc(gfp_mask, nr_pages);
706 if (!bio) 775 if (!bio)
707 return ERR_PTR(-ENOMEM); 776 return ERR_PTR(-ENOMEM);
708 777
709 ret = -ENOMEM; 778 ret = -ENOMEM;
710 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 779 pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
711 if (!pages) 780 if (!pages)
712 goto out; 781 goto out;
713 782
@@ -786,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
786 * @uaddr: start of user address 855 * @uaddr: start of user address
787 * @len: length in bytes 856 * @len: length in bytes
788 * @write_to_vm: bool indicating writing to pages or not 857 * @write_to_vm: bool indicating writing to pages or not
858 * @gfp_mask: memory allocation flags
789 * 859 *
790 * Map the user space address into a bio suitable for io to a block 860 * Map the user space address into a bio suitable for io to a block
791 * device. Returns an error pointer in case of error. 861 * device. Returns an error pointer in case of error.
792 */ 862 */
793struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, 863struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
794 unsigned long uaddr, unsigned int len, int write_to_vm) 864 unsigned long uaddr, unsigned int len, int write_to_vm,
865 gfp_t gfp_mask)
795{ 866{
796 struct sg_iovec iov; 867 struct sg_iovec iov;
797 868
798 iov.iov_base = (void __user *)uaddr; 869 iov.iov_base = (void __user *)uaddr;
799 iov.iov_len = len; 870 iov.iov_len = len;
800 871
801 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); 872 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
802} 873}
803 874
804/** 875/**
@@ -808,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
808 * @iov: the iovec. 879 * @iov: the iovec.
809 * @iov_count: number of elements in the iovec 880 * @iov_count: number of elements in the iovec
810 * @write_to_vm: bool indicating writing to pages or not 881 * @write_to_vm: bool indicating writing to pages or not
882 * @gfp_mask: memory allocation flags
811 * 883 *
812 * Map the user space address into a bio suitable for io to a block 884 * Map the user space address into a bio suitable for io to a block
813 * device. Returns an error pointer in case of error. 885 * device. Returns an error pointer in case of error.
814 */ 886 */
815struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, 887struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
816 struct sg_iovec *iov, int iov_count, 888 struct sg_iovec *iov, int iov_count,
817 int write_to_vm) 889 int write_to_vm, gfp_t gfp_mask)
818{ 890{
819 struct bio *bio; 891 struct bio *bio;
820 892
821 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); 893 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
822 894 gfp_mask);
823 if (IS_ERR(bio)) 895 if (IS_ERR(bio))
824 return bio; 896 return bio;
825 897
@@ -976,48 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
976struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, 1048struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
977 gfp_t gfp_mask, int reading) 1049 gfp_t gfp_mask, int reading)
978{ 1050{
979 unsigned long kaddr = (unsigned long)data;
980 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
981 unsigned long start = kaddr >> PAGE_SHIFT;
982 const int nr_pages = end - start;
983 struct bio *bio; 1051 struct bio *bio;
984 struct bio_vec *bvec; 1052 struct bio_vec *bvec;
985 struct bio_map_data *bmd; 1053 int i;
986 int i, ret;
987 struct sg_iovec iov;
988
989 iov.iov_base = data;
990 iov.iov_len = len;
991
992 bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
993 if (!bmd)
994 return ERR_PTR(-ENOMEM);
995
996 ret = -ENOMEM;
997 bio = bio_alloc(gfp_mask, nr_pages);
998 if (!bio)
999 goto out_bmd;
1000
1001 while (len) {
1002 struct page *page;
1003 unsigned int bytes = PAGE_SIZE;
1004
1005 if (bytes > len)
1006 bytes = len;
1007
1008 page = alloc_page(q->bounce_gfp | gfp_mask);
1009 if (!page) {
1010 ret = -ENOMEM;
1011 goto cleanup;
1012 }
1013
1014 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
1015 ret = -EINVAL;
1016 goto cleanup;
1017 }
1018 1054
1019 len -= bytes; 1055 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
1020 } 1056 if (IS_ERR(bio))
1057 return bio;
1021 1058
1022 if (!reading) { 1059 if (!reading) {
1023 void *p = data; 1060 void *p = data;
@@ -1030,20 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1030 } 1067 }
1031 } 1068 }
1032 1069
1033 bio->bi_private = bmd;
1034 bio->bi_end_io = bio_copy_kern_endio; 1070 bio->bi_end_io = bio_copy_kern_endio;
1035 1071
1036 bio_set_map_data(bmd, bio, &iov, 1);
1037 return bio; 1072 return bio;
1038cleanup:
1039 bio_for_each_segment(bvec, bio, i)
1040 __free_page(bvec->bv_page);
1041
1042 bio_put(bio);
1043out_bmd:
1044 bio_free_map_data(bmd);
1045
1046 return ERR_PTR(ret);
1047} 1073}
1048 1074
1049/* 1075/*
@@ -1230,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err)
1230 * split a bio - only worry about a bio with a single page 1256 * split a bio - only worry about a bio with a single page
1231 * in it's iovec 1257 * in it's iovec
1232 */ 1258 */
1233struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) 1259struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1234{ 1260{
1235 struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); 1261 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
1236 1262
1237 if (!bp) 1263 if (!bp)
1238 return bp; 1264 return bp;
@@ -1266,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1266 bp->bio2.bi_end_io = bio_pair_end_2; 1292 bp->bio2.bi_end_io = bio_pair_end_2;
1267 1293
1268 bp->bio1.bi_private = bi; 1294 bp->bio1.bi_private = bi;
1269 bp->bio2.bi_private = pool; 1295 bp->bio2.bi_private = bio_split_pool;
1270 1296
1271 if (bio_integrity(bi)) 1297 if (bio_integrity(bi))
1272 bio_integrity_split(bi, bp, first_sectors); 1298 bio_integrity_split(bi, bp, first_sectors);
@@ -1274,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1274 return bp; 1300 return bp;
1275} 1301}
1276 1302
1303/**
1304 * bio_sector_offset - Find hardware sector offset in bio
1305 * @bio: bio to inspect
1306 * @index: bio_vec index
1307 * @offset: offset in bv_page
1308 *
1309 * Return the number of hardware sectors between beginning of bio
1310 * and an end point indicated by a bio_vec index and an offset
1311 * within that vector's page.
1312 */
1313sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1314 unsigned int offset)
1315{
1316 unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
1317 struct bio_vec *bv;
1318 sector_t sectors;
1319 int i;
1320
1321 sectors = 0;
1322
1323 if (index >= bio->bi_idx)
1324 index = bio->bi_vcnt - 1;
1325
1326 __bio_for_each_segment(bv, bio, i, 0) {
1327 if (i == index) {
1328 if (offset > bv->bv_offset)
1329 sectors += (offset - bv->bv_offset) / sector_sz;
1330 break;
1331 }
1332
1333 sectors += bv->bv_len / sector_sz;
1334 }
1335
1336 return sectors;
1337}
1338EXPORT_SYMBOL(bio_sector_offset);
1277 1339
1278/* 1340/*
1279 * create memory pools for biovec's in a bio_set. 1341 * create memory pools for biovec's in a bio_set.
@@ -1376,6 +1438,7 @@ static int __init init_bio(void)
1376subsys_initcall(init_bio); 1438subsys_initcall(init_bio);
1377 1439
1378EXPORT_SYMBOL(bio_alloc); 1440EXPORT_SYMBOL(bio_alloc);
1441EXPORT_SYMBOL(bio_kmalloc);
1379EXPORT_SYMBOL(bio_put); 1442EXPORT_SYMBOL(bio_put);
1380EXPORT_SYMBOL(bio_free); 1443EXPORT_SYMBOL(bio_free);
1381EXPORT_SYMBOL(bio_endio); 1444EXPORT_SYMBOL(bio_endio);
@@ -1383,7 +1446,6 @@ EXPORT_SYMBOL(bio_init);
1383EXPORT_SYMBOL(__bio_clone); 1446EXPORT_SYMBOL(__bio_clone);
1384EXPORT_SYMBOL(bio_clone); 1447EXPORT_SYMBOL(bio_clone);
1385EXPORT_SYMBOL(bio_phys_segments); 1448EXPORT_SYMBOL(bio_phys_segments);
1386EXPORT_SYMBOL(bio_hw_segments);
1387EXPORT_SYMBOL(bio_add_page); 1449EXPORT_SYMBOL(bio_add_page);
1388EXPORT_SYMBOL(bio_add_pc_page); 1450EXPORT_SYMBOL(bio_add_pc_page);
1389EXPORT_SYMBOL(bio_get_nr_vecs); 1451EXPORT_SYMBOL(bio_get_nr_vecs);
@@ -1393,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern);
1393EXPORT_SYMBOL(bio_copy_kern); 1455EXPORT_SYMBOL(bio_copy_kern);
1394EXPORT_SYMBOL(bio_pair_release); 1456EXPORT_SYMBOL(bio_pair_release);
1395EXPORT_SYMBOL(bio_split); 1457EXPORT_SYMBOL(bio_split);
1396EXPORT_SYMBOL(bio_split_pool);
1397EXPORT_SYMBOL(bio_copy_user); 1458EXPORT_SYMBOL(bio_copy_user);
1398EXPORT_SYMBOL(bio_uncopy_user); 1459EXPORT_SYMBOL(bio_uncopy_user);
1399EXPORT_SYMBOL(bioset_create); 1460EXPORT_SYMBOL(bioset_create);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index aff54219e049..d84f0469a016 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release);
540 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 540 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
541 */ 541 */
542 542
543static struct kobject *bdev_get_kobj(struct block_device *bdev)
544{
545 if (bdev->bd_contains != bdev)
546 return kobject_get(&bdev->bd_part->dev.kobj);
547 else
548 return kobject_get(&bdev->bd_disk->dev.kobj);
549}
550
551static struct kobject *bdev_get_holder(struct block_device *bdev)
552{
553 if (bdev->bd_contains != bdev)
554 return kobject_get(bdev->bd_part->holder_dir);
555 else
556 return kobject_get(bdev->bd_disk->holder_dir);
557}
558
559static int add_symlink(struct kobject *from, struct kobject *to) 543static int add_symlink(struct kobject *from, struct kobject *to)
560{ 544{
561 if (!from || !to) 545 if (!from || !to)
@@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev,
604 if (!bo->hdev) 588 if (!bo->hdev)
605 goto fail_put_sdir; 589 goto fail_put_sdir;
606 590
607 bo->sdev = bdev_get_kobj(bdev); 591 bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
608 if (!bo->sdev) 592 if (!bo->sdev)
609 goto fail_put_hdev; 593 goto fail_put_hdev;
610 594
611 bo->hdir = bdev_get_holder(bdev); 595 bo->hdir = kobject_get(bdev->bd_part->holder_dir);
612 if (!bo->hdir) 596 if (!bo->hdir)
613 goto fail_put_sdev; 597 goto fail_put_sdev;
614 598
@@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
868 852
869EXPORT_SYMBOL(open_by_devnum); 853EXPORT_SYMBOL(open_by_devnum);
870 854
855/**
856 * flush_disk - invalidates all buffer-cache entries on a disk
857 *
858 * @bdev: struct block device to be flushed
859 *
860 * Invalidates all buffer-cache entries on a disk. It should be called
861 * when a disk has been changed -- either by a media change or online
862 * resize.
863 */
864static void flush_disk(struct block_device *bdev)
865{
866 if (__invalidate_device(bdev)) {
867 char name[BDEVNAME_SIZE] = "";
868
869 if (bdev->bd_disk)
870 disk_name(bdev->bd_disk, 0, name);
871 printk(KERN_WARNING "VFS: busy inodes on changed media or "
872 "resized disk %s\n", name);
873 }
874
875 if (!bdev->bd_disk)
876 return;
877 if (disk_partitionable(bdev->bd_disk))
878 bdev->bd_invalidated = 1;
879}
880
881/**
882 * check_disk_size_change - checks for disk size change and adjusts bdev size.
883 * @disk: struct gendisk to check
884 * @bdev: struct bdev to adjust.
885 *
886 * This routine checks to see if the bdev size does not match the disk size
887 * and adjusts it if it differs.
888 */
889void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
890{
891 loff_t disk_size, bdev_size;
892
893 disk_size = (loff_t)get_capacity(disk) << 9;
894 bdev_size = i_size_read(bdev->bd_inode);
895 if (disk_size != bdev_size) {
896 char name[BDEVNAME_SIZE];
897
898 disk_name(disk, 0, name);
899 printk(KERN_INFO
900 "%s: detected capacity change from %lld to %lld\n",
901 name, bdev_size, disk_size);
902 i_size_write(bdev->bd_inode, disk_size);
903 flush_disk(bdev);
904 }
905}
906EXPORT_SYMBOL(check_disk_size_change);
907
908/**
909 * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
910 * @disk: struct gendisk to be revalidated
911 *
912 * This routine is a wrapper for lower-level driver's revalidate_disk
913 * call-backs. It is used to do common pre and post operations needed
914 * for all revalidate_disk operations.
915 */
916int revalidate_disk(struct gendisk *disk)
917{
918 struct block_device *bdev;
919 int ret = 0;
920
921 if (disk->fops->revalidate_disk)
922 ret = disk->fops->revalidate_disk(disk);
923
924 bdev = bdget_disk(disk, 0);
925 if (!bdev)
926 return ret;
927
928 mutex_lock(&bdev->bd_mutex);
929 check_disk_size_change(disk, bdev);
930 mutex_unlock(&bdev->bd_mutex);
931 bdput(bdev);
932 return ret;
933}
934EXPORT_SYMBOL(revalidate_disk);
935
871/* 936/*
872 * This routine checks whether a removable media has been changed, 937 * This routine checks whether a removable media has been changed,
873 * and invalidates all buffer-cache-entries in that case. This 938 * and invalidates all buffer-cache-entries in that case. This
@@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev)
887 if (!bdops->media_changed(bdev->bd_disk)) 952 if (!bdops->media_changed(bdev->bd_disk))
888 return 0; 953 return 0;
889 954
890 if (__invalidate_device(bdev)) 955 flush_disk(bdev);
891 printk("VFS: busy inodes on changed media.\n");
892
893 if (bdops->revalidate_disk) 956 if (bdops->revalidate_disk)
894 bdops->revalidate_disk(bdev->bd_disk); 957 bdops->revalidate_disk(bdev->bd_disk);
895 if (bdev->bd_disk->minors > 1)
896 bdev->bd_invalidated = 1;
897 return 1; 958 return 1;
898} 959}
899 960
@@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part);
927 988
928static int do_open(struct block_device *bdev, struct file *file, int for_part) 989static int do_open(struct block_device *bdev, struct file *file, int for_part)
929{ 990{
930 struct module *owner = NULL;
931 struct gendisk *disk; 991 struct gendisk *disk;
992 struct hd_struct *part = NULL;
932 int ret; 993 int ret;
933 int part; 994 int partno;
934 int perm = 0; 995 int perm = 0;
935 996
936 if (file->f_mode & FMODE_READ) 997 if (file->f_mode & FMODE_READ)
@@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
948 1009
949 ret = -ENXIO; 1010 ret = -ENXIO;
950 file->f_mapping = bdev->bd_inode->i_mapping; 1011 file->f_mapping = bdev->bd_inode->i_mapping;
1012
951 lock_kernel(); 1013 lock_kernel();
952 disk = get_gendisk(bdev->bd_dev, &part); 1014
953 if (!disk) { 1015 disk = get_gendisk(bdev->bd_dev, &partno);
954 unlock_kernel(); 1016 if (!disk)
955 bdput(bdev); 1017 goto out_unlock_kernel;
956 return ret; 1018 part = disk_get_part(disk, partno);
957 } 1019 if (!part)
958 owner = disk->fops->owner; 1020 goto out_unlock_kernel;
959 1021
960 mutex_lock_nested(&bdev->bd_mutex, for_part); 1022 mutex_lock_nested(&bdev->bd_mutex, for_part);
961 if (!bdev->bd_openers) { 1023 if (!bdev->bd_openers) {
962 bdev->bd_disk = disk; 1024 bdev->bd_disk = disk;
1025 bdev->bd_part = part;
963 bdev->bd_contains = bdev; 1026 bdev->bd_contains = bdev;
964 if (!part) { 1027 if (!partno) {
965 struct backing_dev_info *bdi; 1028 struct backing_dev_info *bdi;
966 if (disk->fops->open) { 1029 if (disk->fops->open) {
967 ret = disk->fops->open(bdev->bd_inode, file); 1030 ret = disk->fops->open(bdev->bd_inode, file);
968 if (ret) 1031 if (ret)
969 goto out_first; 1032 goto out_clear;
970 } 1033 }
971 if (!bdev->bd_openers) { 1034 if (!bdev->bd_openers) {
972 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1035 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
@@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
978 if (bdev->bd_invalidated) 1041 if (bdev->bd_invalidated)
979 rescan_partitions(disk, bdev); 1042 rescan_partitions(disk, bdev);
980 } else { 1043 } else {
981 struct hd_struct *p;
982 struct block_device *whole; 1044 struct block_device *whole;
983 whole = bdget_disk(disk, 0); 1045 whole = bdget_disk(disk, 0);
984 ret = -ENOMEM; 1046 ret = -ENOMEM;
985 if (!whole) 1047 if (!whole)
986 goto out_first; 1048 goto out_clear;
987 BUG_ON(for_part); 1049 BUG_ON(for_part);
988 ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); 1050 ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1);
989 if (ret) 1051 if (ret)
990 goto out_first; 1052 goto out_clear;
991 bdev->bd_contains = whole; 1053 bdev->bd_contains = whole;
992 p = disk->part[part - 1];
993 bdev->bd_inode->i_data.backing_dev_info = 1054 bdev->bd_inode->i_data.backing_dev_info =
994 whole->bd_inode->i_data.backing_dev_info; 1055 whole->bd_inode->i_data.backing_dev_info;
995 if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { 1056 if (!(disk->flags & GENHD_FL_UP) ||
1057 !part || !part->nr_sects) {
996 ret = -ENXIO; 1058 ret = -ENXIO;
997 goto out_first; 1059 goto out_clear;
998 } 1060 }
999 kobject_get(&p->dev.kobj); 1061 bd_set_size(bdev, (loff_t)part->nr_sects << 9);
1000 bdev->bd_part = p;
1001 bd_set_size(bdev, (loff_t) p->nr_sects << 9);
1002 } 1062 }
1003 } else { 1063 } else {
1064 disk_put_part(part);
1004 put_disk(disk); 1065 put_disk(disk);
1005 module_put(owner); 1066 module_put(disk->fops->owner);
1067 part = NULL;
1068 disk = NULL;
1006 if (bdev->bd_contains == bdev) { 1069 if (bdev->bd_contains == bdev) {
1007 if (bdev->bd_disk->fops->open) { 1070 if (bdev->bd_disk->fops->open) {
1008 ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); 1071 ret = bdev->bd_disk->fops->open(bdev->bd_inode, file);
1009 if (ret) 1072 if (ret)
1010 goto out; 1073 goto out_unlock_bdev;
1011 } 1074 }
1012 if (bdev->bd_invalidated) 1075 if (bdev->bd_invalidated)
1013 rescan_partitions(bdev->bd_disk, bdev); 1076 rescan_partitions(bdev->bd_disk, bdev);
@@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1020 unlock_kernel(); 1083 unlock_kernel();
1021 return 0; 1084 return 0;
1022 1085
1023out_first: 1086 out_clear:
1024 bdev->bd_disk = NULL; 1087 bdev->bd_disk = NULL;
1088 bdev->bd_part = NULL;
1025 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1089 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1026 if (bdev != bdev->bd_contains) 1090 if (bdev != bdev->bd_contains)
1027 __blkdev_put(bdev->bd_contains, 1); 1091 __blkdev_put(bdev->bd_contains, 1);
1028 bdev->bd_contains = NULL; 1092 bdev->bd_contains = NULL;
1029 put_disk(disk); 1093 out_unlock_bdev:
1030 module_put(owner);
1031out:
1032 mutex_unlock(&bdev->bd_mutex); 1094 mutex_unlock(&bdev->bd_mutex);
1095 out_unlock_kernel:
1033 unlock_kernel(); 1096 unlock_kernel();
1034 if (ret) 1097
1035 bdput(bdev); 1098 disk_put_part(part);
1099 if (disk)
1100 module_put(disk->fops->owner);
1101 put_disk(disk);
1102 bdput(bdev);
1103
1036 return ret; 1104 return ret;
1037} 1105}
1038 1106
@@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
1117 1185
1118 put_disk(disk); 1186 put_disk(disk);
1119 module_put(owner); 1187 module_put(owner);
1120 1188 disk_put_part(bdev->bd_part);
1121 if (bdev->bd_contains != bdev) { 1189 bdev->bd_part = NULL;
1122 kobject_put(&bdev->bd_part->dev.kobj);
1123 bdev->bd_part = NULL;
1124 }
1125 bdev->bd_disk = NULL; 1190 bdev->bd_disk = NULL;
1126 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1191 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1127 if (bdev != bdev->bd_contains) 1192 if (bdev != bdev->bd_contains)
@@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev);
1197 1262
1198/** 1263/**
1199 * lookup_bdev - lookup a struct block_device by name 1264 * lookup_bdev - lookup a struct block_device by name
1265 * @pathname: special file representing the block device
1200 * 1266 *
1201 * @path: special file representing the block device 1267 * Get a reference to the blockdevice at @pathname in the current
1202 *
1203 * Get a reference to the blockdevice at @path in the current
1204 * namespace if possible and return it. Return ERR_PTR(error) 1268 * namespace if possible and return it. Return ERR_PTR(error)
1205 * otherwise. 1269 * otherwise.
1206 */ 1270 */
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index f9e4ad97a79e..06e521a945c3 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -9,7 +9,10 @@ files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit
9on parent directory when server supports Unix Extensions but not POSIX 9on parent directory when server supports Unix Extensions but not POSIX
10create. Update cifs.upcall version to handle new Kerberos sec flags 10create. Update cifs.upcall version to handle new Kerberos sec flags
11(this requires update of cifs.upcall program from Samba). Fix memory leak 11(this requires update of cifs.upcall program from Samba). Fix memory leak
12on dns_upcall (resolving DFS referralls). 12on dns_upcall (resolving DFS referralls). Fix plain text password
13authentication (requires setting SecurityFlags to 0x30030 to enable
14lanman and plain text though). Fix writes to be at correct offset when
15file is open with O_APPEND and file is on a directio (forcediretio) mount.
13 16
14Version 1.53 17Version 1.53
15------------ 18------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 68b5c1169d9d..bd2343d4c6a6 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -542,10 +542,20 @@ SecurityFlags Flags which control security negotiation and
542 hashing mechanisms (as "must use") on the other hand 542 hashing mechanisms (as "must use") on the other hand
543 does not make much sense. Default flags are 543 does not make much sense. Default flags are
544 0x07007 544 0x07007
545 (NTLM, NTLMv2 and packet signing allowed). Maximum 545 (NTLM, NTLMv2 and packet signing allowed). The maximum
546 allowable flags if you want to allow mounts to servers 546 allowable flags if you want to allow mounts to servers
547 using weaker password hashes is 0x37037 (lanman, 547 using weaker password hashes is 0x37037 (lanman,
548 plaintext, ntlm, ntlmv2, signing allowed): 548 plaintext, ntlm, ntlmv2, signing allowed). Some
549 SecurityFlags require the corresponding menuconfig
550 options to be enabled (lanman and plaintext require
551 CONFIG_CIFS_WEAK_PW_HASH for example). Enabling
552 plaintext authentication currently requires also
553 enabling lanman authentication in the security flags
554 because the cifs module only supports sending
555 laintext passwords using the older lanman dialect
556 form of the session setup SMB. (e.g. for authentication
557 using plain text passwords, set the SecurityFlags
558 to 0x30030):
549 559
550 may use packet signing 0x00001 560 may use packet signing 0x00001
551 must use packet signing 0x01001 561 must use packet signing 0x01001
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 83fd40dc1ef0..bd5f13d38450 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
294 294
295 if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) 295 if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0)
296 if (extended_security & CIFSSEC_MAY_PLNTXT) { 296 if (extended_security & CIFSSEC_MAY_PLNTXT) {
297 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
297 memcpy(lnm_session_key, password_with_pad, 298 memcpy(lnm_session_key, password_with_pad,
298 CIFS_ENCPWD_SIZE); 299 CIFS_ENCPWD_SIZE);
299 return; 300 return;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ff14d14903a0..cbefe1f1f9fe 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -833,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
833 return -EBADF; 833 return -EBADF;
834 open_file = (struct cifsFileInfo *) file->private_data; 834 open_file = (struct cifsFileInfo *) file->private_data;
835 835
836 rc = generic_write_checks(file, poffset, &write_size, 0);
837 if (rc)
838 return rc;
839
836 xid = GetXid(); 840 xid = GetXid();
837 841
838 if (*poffset > file->f_path.dentry->d_inode->i_size) 842 if (*poffset > file->f_path.dentry->d_inode->i_size)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index b537fad3bf50..252fdc0567f1 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
409#ifdef CONFIG_CIFS_WEAK_PW_HASH 409#ifdef CONFIG_CIFS_WEAK_PW_HASH
410 char lnm_session_key[CIFS_SESS_KEY_SIZE]; 410 char lnm_session_key[CIFS_SESS_KEY_SIZE];
411 411
412 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
413
412 /* no capabilities flags in old lanman negotiation */ 414 /* no capabilities flags in old lanman negotiation */
413 415
414 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); 416 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
diff --git a/fs/dcache.c b/fs/dcache.c
index 80e93956aced..e7a1a99b7464 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1395,6 +1395,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1395 if (dentry->d_parent != parent) 1395 if (dentry->d_parent != parent)
1396 goto next; 1396 goto next;
1397 1397
1398 /* non-existing due to RCU? */
1399 if (d_unhashed(dentry))
1400 goto next;
1401
1398 /* 1402 /*
1399 * It is safe to compare names since d_move() cannot 1403 * It is safe to compare names since d_move() cannot
1400 * change the qstr (protected by d_lock). 1404 * change the qstr (protected by d_lock).
@@ -1410,10 +1414,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1410 goto next; 1414 goto next;
1411 } 1415 }
1412 1416
1413 if (!d_unhashed(dentry)) { 1417 atomic_inc(&dentry->d_count);
1414 atomic_inc(&dentry->d_count); 1418 found = dentry;
1415 found = dentry;
1416 }
1417 spin_unlock(&dentry->d_lock); 1419 spin_unlock(&dentry->d_lock);
1418 break; 1420 break;
1419next: 1421next:
diff --git a/fs/exec.c b/fs/exec.c
index 32993beecbe9..cecee501ce78 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -752,11 +752,11 @@ static int exec_mmap(struct mm_struct *mm)
752 tsk->active_mm = mm; 752 tsk->active_mm = mm;
753 activate_mm(active_mm, mm); 753 activate_mm(active_mm, mm);
754 task_unlock(tsk); 754 task_unlock(tsk);
755 mm_update_next_owner(old_mm);
756 arch_pick_mmap_layout(mm); 755 arch_pick_mmap_layout(mm);
757 if (old_mm) { 756 if (old_mm) {
758 up_read(&old_mm->mmap_sem); 757 up_read(&old_mm->mmap_sem);
759 BUG_ON(active_mm != old_mm); 758 BUG_ON(active_mm != old_mm);
759 mm_update_next_owner(old_mm);
760 mmput(old_mm); 760 mmput(old_mm);
761 return 0; 761 return 0;
762 } 762 }
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 302e95c4af7e..fb98b3d847ed 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -6,6 +6,7 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/msdos_fs.h> 8#include <linux/msdos_fs.h>
9#include <linux/blkdev.h>
9 10
10struct fatent_operations { 11struct fatent_operations {
11 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); 12 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *);
@@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
535 struct fat_entry fatent; 536 struct fat_entry fatent;
536 struct buffer_head *bhs[MAX_BUF_PER_PAGE]; 537 struct buffer_head *bhs[MAX_BUF_PER_PAGE];
537 int i, err, nr_bhs; 538 int i, err, nr_bhs;
539 int first_cl = cluster;
538 540
539 nr_bhs = 0; 541 nr_bhs = 0;
540 fatent_init(&fatent); 542 fatent_init(&fatent);
@@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster)
551 goto error; 553 goto error;
552 } 554 }
553 555
556 /*
557 * Issue discard for the sectors we no longer care about,
558 * batching contiguous clusters into one request
559 */
560 if (cluster != fatent.entry + 1) {
561 int nr_clus = fatent.entry - first_cl + 1;
562
563 sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl),
564 nr_clus * sbi->sec_per_clus);
565 first_cl = cluster;
566 }
567
554 ops->ent_put(&fatent, FAT_ENT_FREE); 568 ops->ent_put(&fatent, FAT_ENT_FREE);
555 if (sbi->free_clusters != -1) { 569 if (sbi->free_clusters != -1) {
556 sbi->free_clusters++; 570 sbi->free_clusters++;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 13391e546616..c962283d4e7f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1265,6 +1265,8 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1265 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; 1265 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
1266 if (time_before(now, holdtime)) 1266 if (time_before(now, holdtime))
1267 delay = holdtime - now; 1267 delay = holdtime - now;
1268 if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
1269 delay = gl->gl_ops->go_min_hold_time;
1268 1270
1269 spin_lock(&gl->gl_spin); 1271 spin_lock(&gl->gl_spin);
1270 handle_callback(gl, state, 1, delay); 1272 handle_callback(gl, state, 1, delay);
@@ -1578,8 +1580,6 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
1578 *p++ = 'a'; 1580 *p++ = 'a';
1579 if (flags & GL_EXACT) 1581 if (flags & GL_EXACT)
1580 *p++ = 'E'; 1582 *p++ = 'E';
1581 if (flags & GL_ATIME)
1582 *p++ = 'a';
1583 if (flags & GL_NOCACHE) 1583 if (flags & GL_NOCACHE)
1584 *p++ = 'c'; 1584 *p++ = 'c';
1585 if (test_bit(HIF_HOLDER, &iflags)) 1585 if (test_bit(HIF_HOLDER, &iflags))
@@ -1816,15 +1816,17 @@ restart:
1816 if (gl) { 1816 if (gl) {
1817 gi->gl = hlist_entry(gl->gl_list.next, 1817 gi->gl = hlist_entry(gl->gl_list.next,
1818 struct gfs2_glock, gl_list); 1818 struct gfs2_glock, gl_list);
1819 if (gi->gl) 1819 } else {
1820 gfs2_glock_hold(gi->gl); 1820 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
1821 struct gfs2_glock, gl_list);
1821 } 1822 }
1823 if (gi->gl)
1824 gfs2_glock_hold(gi->gl);
1822 read_unlock(gl_lock_addr(gi->hash)); 1825 read_unlock(gl_lock_addr(gi->hash));
1823 if (gl) 1826 if (gl)
1824 gfs2_glock_put(gl); 1827 gfs2_glock_put(gl);
1825 if (gl && gi->gl == NULL)
1826 gi->hash++;
1827 while (gi->gl == NULL) { 1828 while (gi->gl == NULL) {
1829 gi->hash++;
1828 if (gi->hash >= GFS2_GL_HASH_SIZE) 1830 if (gi->hash >= GFS2_GL_HASH_SIZE)
1829 return 1; 1831 return 1;
1830 read_lock(gl_lock_addr(gi->hash)); 1832 read_lock(gl_lock_addr(gi->hash));
@@ -1833,7 +1835,6 @@ restart:
1833 if (gi->gl) 1835 if (gi->gl)
1834 gfs2_glock_hold(gi->gl); 1836 gfs2_glock_hold(gi->gl);
1835 read_unlock(gl_lock_addr(gi->hash)); 1837 read_unlock(gl_lock_addr(gi->hash));
1836 gi->hash++;
1837 } 1838 }
1838 1839
1839 if (gi->sdp != gi->gl->gl_sbd) 1840 if (gi->sdp != gi->gl->gl_sbd)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 971d92af70fc..695c6b193611 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -24,7 +24,6 @@
24#define GL_ASYNC 0x00000040 24#define GL_ASYNC 0x00000040
25#define GL_EXACT 0x00000080 25#define GL_EXACT 0x00000080
26#define GL_SKIP 0x00000100 26#define GL_SKIP 0x00000100
27#define GL_ATIME 0x00000200
28#define GL_NOCACHE 0x00000400 27#define GL_NOCACHE 0x00000400
29 28
30#define GLR_TRYFAILED 13 29#define GLR_TRYFAILED 13
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 448697a5c462..f566ec1b4e8e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -386,20 +386,21 @@ struct gfs2_statfs_change_host {
386#define GFS2_DATA_ORDERED 2 386#define GFS2_DATA_ORDERED 2
387 387
388struct gfs2_args { 388struct gfs2_args {
389 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ 389 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
390 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ 390 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
391 char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ 391 char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
392 int ar_spectator; /* Don't get a journal because we're always RO */ 392 unsigned int ar_spectator:1; /* Don't get a journal */
393 int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */ 393 unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */
394 int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */ 394 unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */
395 int ar_localcaching; /* Local-style caching (dangerous on multihost) */ 395 unsigned int ar_localcaching:1; /* Local caching */
396 int ar_debug; /* Oops on errors instead of trying to be graceful */ 396 unsigned int ar_debug:1; /* Oops on errors */
397 int ar_upgrade; /* Upgrade ondisk/multihost format */ 397 unsigned int ar_upgrade:1; /* Upgrade ondisk format */
398 unsigned int ar_num_glockd; /* Number of glockd threads */ 398 unsigned int ar_posix_acl:1; /* Enable posix acls */
399 int ar_posix_acl; /* Enable posix acls */ 399 unsigned int ar_quota:2; /* off/account/on */
400 int ar_quota; /* off/account/on */ 400 unsigned int ar_suiddir:1; /* suiddir support */
401 int ar_suiddir; /* suiddir support */ 401 unsigned int ar_data:2; /* ordered/writeback */
402 int ar_data; /* ordered/writeback */ 402 unsigned int ar_meta:1; /* mount metafs */
403 unsigned int ar_num_glockd; /* Number of glockd threads */
403}; 404};
404 405
405struct gfs2_tune { 406struct gfs2_tune {
@@ -419,7 +420,6 @@ struct gfs2_tune {
419 unsigned int gt_quota_scale_den; /* Denominator */ 420 unsigned int gt_quota_scale_den; /* Denominator */
420 unsigned int gt_quota_cache_secs; 421 unsigned int gt_quota_cache_secs;
421 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ 422 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
422 unsigned int gt_atime_quantum; /* Min secs between atime updates */
423 unsigned int gt_new_files_jdata; 423 unsigned int gt_new_files_jdata;
424 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ 424 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
425 unsigned int gt_stall_secs; /* Detects trouble! */ 425 unsigned int gt_stall_secs; /* Detects trouble! */
@@ -432,7 +432,7 @@ enum {
432 SDF_JOURNAL_CHECKED = 0, 432 SDF_JOURNAL_CHECKED = 0,
433 SDF_JOURNAL_LIVE = 1, 433 SDF_JOURNAL_LIVE = 1,
434 SDF_SHUTDOWN = 2, 434 SDF_SHUTDOWN = 2,
435 SDF_NOATIME = 3, 435 SDF_NOBARRIERS = 3,
436}; 436};
437 437
438#define GFS2_FSNAME_LEN 256 438#define GFS2_FSNAME_LEN 256
@@ -461,7 +461,6 @@ struct gfs2_sb_host {
461 461
462struct gfs2_sbd { 462struct gfs2_sbd {
463 struct super_block *sd_vfs; 463 struct super_block *sd_vfs;
464 struct super_block *sd_vfs_meta;
465 struct kobject sd_kobj; 464 struct kobject sd_kobj;
466 unsigned long sd_flags; /* SDF_... */ 465 unsigned long sd_flags; /* SDF_... */
467 struct gfs2_sb_host sd_sb; 466 struct gfs2_sb_host sd_sb;
@@ -499,7 +498,9 @@ struct gfs2_sbd {
499 498
500 /* Inode Stuff */ 499 /* Inode Stuff */
501 500
502 struct inode *sd_master_dir; 501 struct dentry *sd_master_dir;
502 struct dentry *sd_root_dir;
503
503 struct inode *sd_jindex; 504 struct inode *sd_jindex;
504 struct inode *sd_inum_inode; 505 struct inode *sd_inum_inode;
505 struct inode *sd_statfs_inode; 506 struct inode *sd_statfs_inode;
@@ -634,7 +635,6 @@ struct gfs2_sbd {
634 /* Debugging crud */ 635 /* Debugging crud */
635 636
636 unsigned long sd_last_warning; 637 unsigned long sd_last_warning;
637 struct vfsmount *sd_gfs2mnt;
638 struct dentry *debugfs_dir; /* debugfs directory */ 638 struct dentry *debugfs_dir; /* debugfs directory */
639 struct dentry *debugfs_dentry_glocks; /* for debugfs */ 639 struct dentry *debugfs_dentry_glocks; /* for debugfs */
640}; 640};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8b0806a32948..7cee695fa441 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -18,6 +18,7 @@
18#include <linux/crc32.h> 18#include <linux/crc32.h>
19#include <linux/lm_interface.h> 19#include <linux/lm_interface.h>
20#include <linux/security.h> 20#include <linux/security.h>
21#include <linux/time.h>
21 22
22#include "gfs2.h" 23#include "gfs2.h"
23#include "incore.h" 24#include "incore.h"
@@ -249,6 +250,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
249{ 250{
250 struct gfs2_dinode_host *di = &ip->i_di; 251 struct gfs2_dinode_host *di = &ip->i_di;
251 const struct gfs2_dinode *str = buf; 252 const struct gfs2_dinode *str = buf;
253 struct timespec atime;
252 u16 height, depth; 254 u16 height, depth;
253 255
254 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) 256 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
@@ -275,8 +277,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
275 di->di_size = be64_to_cpu(str->di_size); 277 di->di_size = be64_to_cpu(str->di_size);
276 i_size_write(&ip->i_inode, di->di_size); 278 i_size_write(&ip->i_inode, di->di_size);
277 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 279 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
278 ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); 280 atime.tv_sec = be64_to_cpu(str->di_atime);
279 ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 281 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
282 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
283 ip->i_inode.i_atime = atime;
280 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 284 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
281 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 285 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
282 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 286 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
@@ -1033,13 +1037,11 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
1033 1037
1034 if (bh) 1038 if (bh)
1035 brelse(bh); 1039 brelse(bh);
1036 if (!inode)
1037 return ERR_PTR(-ENOMEM);
1038 return inode; 1040 return inode;
1039 1041
1040fail_gunlock2: 1042fail_gunlock2:
1041 gfs2_glock_dq_uninit(ghs + 1); 1043 gfs2_glock_dq_uninit(ghs + 1);
1042 if (inode) 1044 if (inode && !IS_ERR(inode))
1043 iput(inode); 1045 iput(inode);
1044fail_gunlock: 1046fail_gunlock:
1045 gfs2_glock_dq(ghs); 1047 gfs2_glock_dq(ghs);
@@ -1140,54 +1142,6 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
1140 return 0; 1142 return 0;
1141} 1143}
1142 1144
1143/*
1144 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1145 * @this: move this
1146 * @to: to here
1147 *
1148 * Follow @to back to the root and make sure we don't encounter @this
1149 * Assumes we already hold the rename lock.
1150 *
1151 * Returns: errno
1152 */
1153
1154int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1155{
1156 struct inode *dir = &to->i_inode;
1157 struct super_block *sb = dir->i_sb;
1158 struct inode *tmp;
1159 struct qstr dotdot;
1160 int error = 0;
1161
1162 gfs2_str2qstr(&dotdot, "..");
1163
1164 igrab(dir);
1165
1166 for (;;) {
1167 if (dir == &this->i_inode) {
1168 error = -EINVAL;
1169 break;
1170 }
1171 if (dir == sb->s_root->d_inode) {
1172 error = 0;
1173 break;
1174 }
1175
1176 tmp = gfs2_lookupi(dir, &dotdot, 1);
1177 if (IS_ERR(tmp)) {
1178 error = PTR_ERR(tmp);
1179 break;
1180 }
1181
1182 iput(dir);
1183 dir = tmp;
1184 }
1185
1186 iput(dir);
1187
1188 return error;
1189}
1190
1191/** 1145/**
1192 * gfs2_readlinki - return the contents of a symlink 1146 * gfs2_readlinki - return the contents of a symlink
1193 * @ip: the symlink's inode 1147 * @ip: the symlink's inode
@@ -1207,8 +1161,8 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
1207 unsigned int x; 1161 unsigned int x;
1208 int error; 1162 int error;
1209 1163
1210 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); 1164 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
1211 error = gfs2_glock_nq_atime(&i_gh); 1165 error = gfs2_glock_nq(&i_gh);
1212 if (error) { 1166 if (error) {
1213 gfs2_holder_uninit(&i_gh); 1167 gfs2_holder_uninit(&i_gh);
1214 return error; 1168 return error;
@@ -1243,101 +1197,6 @@ out:
1243 return error; 1197 return error;
1244} 1198}
1245 1199
1246/**
1247 * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
1248 * conditionally update the inode's atime
1249 * @gh: the holder to acquire
1250 *
1251 * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
1252 * Update if the difference between the current time and the inode's current
1253 * atime is greater than an interval specified at mount.
1254 *
1255 * Returns: errno
1256 */
1257
1258int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1259{
1260 struct gfs2_glock *gl = gh->gh_gl;
1261 struct gfs2_sbd *sdp = gl->gl_sbd;
1262 struct gfs2_inode *ip = gl->gl_object;
1263 s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1264 unsigned int state;
1265 int flags;
1266 int error;
1267 struct timespec tv = CURRENT_TIME;
1268
1269 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1270 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
1271 gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
1272 return -EINVAL;
1273
1274 state = gh->gh_state;
1275 flags = gh->gh_flags;
1276
1277 error = gfs2_glock_nq(gh);
1278 if (error)
1279 return error;
1280
1281 if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
1282 (sdp->sd_vfs->s_flags & MS_RDONLY))
1283 return 0;
1284
1285 if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
1286 gfs2_glock_dq(gh);
1287 gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
1288 gh);
1289 error = gfs2_glock_nq(gh);
1290 if (error)
1291 return error;
1292
1293 /* Verify that atime hasn't been updated while we were
1294 trying to get exclusive lock. */
1295
1296 tv = CURRENT_TIME;
1297 if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
1298 struct buffer_head *dibh;
1299 struct gfs2_dinode *di;
1300
1301 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1302 if (error == -EROFS)
1303 return 0;
1304 if (error)
1305 goto fail;
1306
1307 error = gfs2_meta_inode_buffer(ip, &dibh);
1308 if (error)
1309 goto fail_end_trans;
1310
1311 ip->i_inode.i_atime = tv;
1312
1313 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1314 di = (struct gfs2_dinode *)dibh->b_data;
1315 di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
1316 di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
1317 brelse(dibh);
1318
1319 gfs2_trans_end(sdp);
1320 }
1321
1322 /* If someone else has asked for the glock,
1323 unlock and let them have it. Then reacquire
1324 in the original state. */
1325 if (gfs2_glock_is_blocking(gl)) {
1326 gfs2_glock_dq(gh);
1327 gfs2_holder_reinit(state, flags, gh);
1328 return gfs2_glock_nq(gh);
1329 }
1330 }
1331
1332 return 0;
1333
1334fail_end_trans:
1335 gfs2_trans_end(sdp);
1336fail:
1337 gfs2_glock_dq(gh);
1338 return error;
1339}
1340
1341static int 1200static int
1342__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1201__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1343{ 1202{
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 58f9607d6a86..2d43f69610a0 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -91,9 +91,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
91int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 91int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
92 const struct gfs2_inode *ip); 92 const struct gfs2_inode *ip);
93int gfs2_permission(struct inode *inode, int mask); 93int gfs2_permission(struct inode *inode, int mask);
94int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
95int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); 94int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
96int gfs2_glock_nq_atime(struct gfs2_holder *gh);
97int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 95int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
98struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 96struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
99void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 97void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6c6af9f5e3ab..ad305854bdc6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
18#include <linux/delay.h> 18#include <linux/delay.h>
19#include <linux/kthread.h> 19#include <linux/kthread.h>
20#include <linux/freezer.h> 20#include <linux/freezer.h>
21#include <linux/bio.h>
21 22
22#include "gfs2.h" 23#include "gfs2.h"
23#include "incore.h" 24#include "incore.h"
@@ -584,7 +585,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
584 memset(bh->b_data, 0, bh->b_size); 585 memset(bh->b_data, 0, bh->b_size);
585 set_buffer_uptodate(bh); 586 set_buffer_uptodate(bh);
586 clear_buffer_dirty(bh); 587 clear_buffer_dirty(bh);
587 unlock_buffer(bh);
588 588
589 gfs2_ail1_empty(sdp, 0); 589 gfs2_ail1_empty(sdp, 0);
590 tail = current_tail(sdp); 590 tail = current_tail(sdp);
@@ -601,8 +601,23 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
601 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); 601 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
602 lh->lh_hash = cpu_to_be32(hash); 602 lh->lh_hash = cpu_to_be32(hash);
603 603
604 set_buffer_dirty(bh); 604 bh->b_end_io = end_buffer_write_sync;
605 if (sync_dirty_buffer(bh)) 605 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
606 goto skip_barrier;
607 get_bh(bh);
608 submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh);
609 wait_on_buffer(bh);
610 if (buffer_eopnotsupp(bh)) {
611 clear_buffer_eopnotsupp(bh);
612 set_buffer_uptodate(bh);
613 set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
614 lock_buffer(bh);
615skip_barrier:
616 get_bh(bh);
617 submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh);
618 wait_on_buffer(bh);
619 }
620 if (!buffer_uptodate(bh))
606 gfs2_io_error_bh(sdp, bh); 621 gfs2_io_error_bh(sdp, bh);
607 brelse(bh); 622 brelse(bh);
608 623
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index b941f9f9f958..df48333e6f01 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -42,6 +42,7 @@ enum {
42 Opt_nosuiddir, 42 Opt_nosuiddir,
43 Opt_data_writeback, 43 Opt_data_writeback,
44 Opt_data_ordered, 44 Opt_data_ordered,
45 Opt_meta,
45 Opt_err, 46 Opt_err,
46}; 47};
47 48
@@ -66,6 +67,7 @@ static match_table_t tokens = {
66 {Opt_nosuiddir, "nosuiddir"}, 67 {Opt_nosuiddir, "nosuiddir"},
67 {Opt_data_writeback, "data=writeback"}, 68 {Opt_data_writeback, "data=writeback"},
68 {Opt_data_ordered, "data=ordered"}, 69 {Opt_data_ordered, "data=ordered"},
70 {Opt_meta, "meta"},
69 {Opt_err, NULL} 71 {Opt_err, NULL}
70}; 72};
71 73
@@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
239 case Opt_data_ordered: 241 case Opt_data_ordered:
240 args->ar_data = GFS2_DATA_ORDERED; 242 args->ar_data = GFS2_DATA_ORDERED;
241 break; 243 break;
244 case Opt_meta:
245 if (remount && args->ar_meta != 1)
246 goto cant_remount;
247 args->ar_meta = 1;
248 break;
242 case Opt_err: 249 case Opt_err:
243 default: 250 default:
244 fs_info(sdp, "unknown option: %s\n", o); 251 fs_info(sdp, "unknown option: %s\n", o);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index e64a1b04117a..27563816e1c5 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -512,8 +512,8 @@ static int gfs2_readpage(struct file *file, struct page *page)
512 int error; 512 int error;
513 513
514 unlock_page(page); 514 unlock_page(page);
515 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 515 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
516 error = gfs2_glock_nq_atime(&gh); 516 error = gfs2_glock_nq(&gh);
517 if (unlikely(error)) 517 if (unlikely(error))
518 goto out; 518 goto out;
519 error = AOP_TRUNCATED_PAGE; 519 error = AOP_TRUNCATED_PAGE;
@@ -594,8 +594,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
594 struct gfs2_holder gh; 594 struct gfs2_holder gh;
595 int ret; 595 int ret;
596 596
597 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 597 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
598 ret = gfs2_glock_nq_atime(&gh); 598 ret = gfs2_glock_nq(&gh);
599 if (unlikely(ret)) 599 if (unlikely(ret))
600 goto out_uninit; 600 goto out_uninit;
601 if (!gfs2_is_stuffed(ip)) 601 if (!gfs2_is_stuffed(ip))
@@ -636,8 +636,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
636 unsigned to = from + len; 636 unsigned to = from + len;
637 struct page *page; 637 struct page *page;
638 638
639 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); 639 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
640 error = gfs2_glock_nq_atime(&ip->i_gh); 640 error = gfs2_glock_nq(&ip->i_gh);
641 if (unlikely(error)) 641 if (unlikely(error))
642 goto out_uninit; 642 goto out_uninit;
643 643
@@ -975,7 +975,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
975 if (gfs2_is_stuffed(ip)) 975 if (gfs2_is_stuffed(ip))
976 return 0; 976 return 0;
977 977
978 if (offset > i_size_read(&ip->i_inode)) 978 if (offset >= i_size_read(&ip->i_inode))
979 return 0; 979 return 0;
980 return 1; 980 return 1;
981} 981}
@@ -1000,8 +1000,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1000 * unfortunately have the option of only flushing a range like 1000 * unfortunately have the option of only flushing a range like
1001 * the VFS does. 1001 * the VFS does.
1002 */ 1002 */
1003 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); 1003 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
1004 rv = gfs2_glock_nq_atime(&gh); 1004 rv = gfs2_glock_nq(&gh);
1005 if (rv) 1005 if (rv)
1006 return rv; 1006 return rv;
1007 rv = gfs2_ok_for_dio(ip, rw, offset); 1007 rv = gfs2_ok_for_dio(ip, rw, offset);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index e9a366d4411c..3a747f8e2188 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -89,8 +89,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
89 u64 offset = file->f_pos; 89 u64 offset = file->f_pos;
90 int error; 90 int error;
91 91
92 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); 92 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
93 error = gfs2_glock_nq_atime(&d_gh); 93 error = gfs2_glock_nq(&d_gh);
94 if (error) { 94 if (error) {
95 gfs2_holder_uninit(&d_gh); 95 gfs2_holder_uninit(&d_gh);
96 return error; 96 return error;
@@ -153,8 +153,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
153 int error; 153 int error;
154 u32 fsflags; 154 u32 fsflags;
155 155
156 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); 156 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
157 error = gfs2_glock_nq_atime(&gh); 157 error = gfs2_glock_nq(&gh);
158 if (error) 158 if (error)
159 return error; 159 return error;
160 160
@@ -351,8 +351,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
351 struct gfs2_alloc *al; 351 struct gfs2_alloc *al;
352 int ret; 352 int ret;
353 353
354 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); 354 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
355 ret = gfs2_glock_nq_atime(&gh); 355 ret = gfs2_glock_nq(&gh);
356 if (ret) 356 if (ret)
357 goto out; 357 goto out;
358 358
@@ -434,8 +434,8 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
434 struct gfs2_holder i_gh; 434 struct gfs2_holder i_gh;
435 int error; 435 int error;
436 436
437 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); 437 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
438 error = gfs2_glock_nq_atime(&i_gh); 438 error = gfs2_glock_nq(&i_gh);
439 if (error) { 439 if (error) {
440 gfs2_holder_uninit(&i_gh); 440 gfs2_holder_uninit(&i_gh);
441 return error; 441 return error;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b4d1d6490633..b117fcf2c4f5 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -40,6 +40,44 @@
40#define DO 0 40#define DO 0
41#define UNDO 1 41#define UNDO 1
42 42
43static const u32 gfs2_old_fs_formats[] = {
44 0
45};
46
47static const u32 gfs2_old_multihost_formats[] = {
48 0
49};
50
51/**
52 * gfs2_tune_init - Fill a gfs2_tune structure with default values
53 * @gt: tune
54 *
55 */
56
57static void gfs2_tune_init(struct gfs2_tune *gt)
58{
59 spin_lock_init(&gt->gt_spin);
60
61 gt->gt_demote_secs = 300;
62 gt->gt_incore_log_blocks = 1024;
63 gt->gt_log_flush_secs = 60;
64 gt->gt_recoverd_secs = 60;
65 gt->gt_logd_secs = 1;
66 gt->gt_quotad_secs = 5;
67 gt->gt_quota_simul_sync = 64;
68 gt->gt_quota_warn_period = 10;
69 gt->gt_quota_scale_num = 1;
70 gt->gt_quota_scale_den = 1;
71 gt->gt_quota_cache_secs = 300;
72 gt->gt_quota_quantum = 60;
73 gt->gt_new_files_jdata = 0;
74 gt->gt_max_readahead = 1 << 18;
75 gt->gt_stall_secs = 600;
76 gt->gt_complain_secs = 10;
77 gt->gt_statfs_quantum = 30;
78 gt->gt_statfs_slow = 0;
79}
80
43static struct gfs2_sbd *init_sbd(struct super_block *sb) 81static struct gfs2_sbd *init_sbd(struct super_block *sb)
44{ 82{
45 struct gfs2_sbd *sdp; 83 struct gfs2_sbd *sdp;
@@ -96,21 +134,271 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
96 return sdp; 134 return sdp;
97} 135}
98 136
99static void init_vfs(struct super_block *sb, unsigned noatime) 137
138/**
139 * gfs2_check_sb - Check superblock
140 * @sdp: the filesystem
141 * @sb: The superblock
142 * @silent: Don't print a message if the check fails
143 *
144 * Checks the version code of the FS is one that we understand how to
145 * read and that the sizes of the various on-disk structures have not
146 * changed.
147 */
148
149static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
100{ 150{
101 struct gfs2_sbd *sdp = sb->s_fs_info; 151 unsigned int x;
102 152
103 sb->s_magic = GFS2_MAGIC; 153 if (sb->sb_magic != GFS2_MAGIC ||
104 sb->s_op = &gfs2_super_ops; 154 sb->sb_type != GFS2_METATYPE_SB) {
105 sb->s_export_op = &gfs2_export_ops; 155 if (!silent)
106 sb->s_time_gran = 1; 156 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
107 sb->s_maxbytes = MAX_LFS_FILESIZE; 157 return -EINVAL;
158 }
159
160 /* If format numbers match exactly, we're done. */
161
162 if (sb->sb_fs_format == GFS2_FORMAT_FS &&
163 sb->sb_multihost_format == GFS2_FORMAT_MULTI)
164 return 0;
165
166 if (sb->sb_fs_format != GFS2_FORMAT_FS) {
167 for (x = 0; gfs2_old_fs_formats[x]; x++)
168 if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
169 break;
170
171 if (!gfs2_old_fs_formats[x]) {
172 printk(KERN_WARNING
173 "GFS2: code version (%u, %u) is incompatible "
174 "with ondisk format (%u, %u)\n",
175 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
176 sb->sb_fs_format, sb->sb_multihost_format);
177 printk(KERN_WARNING
178 "GFS2: I don't know how to upgrade this FS\n");
179 return -EINVAL;
180 }
181 }
182
183 if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
184 for (x = 0; gfs2_old_multihost_formats[x]; x++)
185 if (gfs2_old_multihost_formats[x] ==
186 sb->sb_multihost_format)
187 break;
188
189 if (!gfs2_old_multihost_formats[x]) {
190 printk(KERN_WARNING
191 "GFS2: code version (%u, %u) is incompatible "
192 "with ondisk format (%u, %u)\n",
193 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
194 sb->sb_fs_format, sb->sb_multihost_format);
195 printk(KERN_WARNING
196 "GFS2: I don't know how to upgrade this FS\n");
197 return -EINVAL;
198 }
199 }
200
201 if (!sdp->sd_args.ar_upgrade) {
202 printk(KERN_WARNING
203 "GFS2: code version (%u, %u) is incompatible "
204 "with ondisk format (%u, %u)\n",
205 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
206 sb->sb_fs_format, sb->sb_multihost_format);
207 printk(KERN_INFO
208 "GFS2: Use the \"upgrade\" mount option to upgrade "
209 "the FS\n");
210 printk(KERN_INFO "GFS2: See the manual for more details\n");
211 return -EINVAL;
212 }
213
214 return 0;
215}
216
217static void end_bio_io_page(struct bio *bio, int error)
218{
219 struct page *page = bio->bi_private;
108 220
109 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) 221 if (!error)
110 set_bit(noatime, &sdp->sd_flags); 222 SetPageUptodate(page);
223 else
224 printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
225 unlock_page(page);
226}
227
228static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
229{
230 const struct gfs2_sb *str = buf;
231
232 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
233 sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
234 sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
235 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
236 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
237 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
238 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
239 sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
240 sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
241 sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
242 sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
243
244 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
245 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
246}
247
248/**
249 * gfs2_read_super - Read the gfs2 super block from disk
250 * @sdp: The GFS2 super block
251 * @sector: The location of the super block
252 * @error: The error code to return
253 *
254 * This uses the bio functions to read the super block from disk
255 * because we want to be 100% sure that we never read cached data.
256 * A super block is read twice only during each GFS2 mount and is
257 * never written to by the filesystem. The first time its read no
258 * locks are held, and the only details which are looked at are those
259 * relating to the locking protocol. Once locking is up and working,
260 * the sb is read again under the lock to establish the location of
261 * the master directory (contains pointers to journals etc) and the
262 * root directory.
263 *
264 * Returns: 0 on success or error
265 */
266
267static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
268{
269 struct super_block *sb = sdp->sd_vfs;
270 struct gfs2_sb *p;
271 struct page *page;
272 struct bio *bio;
273
274 page = alloc_page(GFP_NOFS);
275 if (unlikely(!page))
276 return -ENOBUFS;
277
278 ClearPageUptodate(page);
279 ClearPageDirty(page);
280 lock_page(page);
281
282 bio = bio_alloc(GFP_NOFS, 1);
283 if (unlikely(!bio)) {
284 __free_page(page);
285 return -ENOBUFS;
286 }
111 287
112 /* Don't let the VFS update atimes. GFS2 handles this itself. */ 288 bio->bi_sector = sector * (sb->s_blocksize >> 9);
113 sb->s_flags |= MS_NOATIME | MS_NODIRATIME; 289 bio->bi_bdev = sb->s_bdev;
290 bio_add_page(bio, page, PAGE_SIZE, 0);
291
292 bio->bi_end_io = end_bio_io_page;
293 bio->bi_private = page;
294 submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
295 wait_on_page_locked(page);
296 bio_put(bio);
297 if (!PageUptodate(page)) {
298 __free_page(page);
299 return -EIO;
300 }
301 p = kmap(page);
302 gfs2_sb_in(&sdp->sd_sb, p);
303 kunmap(page);
304 __free_page(page);
305 return 0;
306}
307/**
308 * gfs2_read_sb - Read super block
309 * @sdp: The GFS2 superblock
310 * @gl: the glock for the superblock (assumed to be held)
311 * @silent: Don't print message if mount fails
312 *
313 */
314
315static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
316{
317 u32 hash_blocks, ind_blocks, leaf_blocks;
318 u32 tmp_blocks;
319 unsigned int x;
320 int error;
321
322 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
323 if (error) {
324 if (!silent)
325 fs_err(sdp, "can't read superblock\n");
326 return error;
327 }
328
329 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
330 if (error)
331 return error;
332
333 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
334 GFS2_BASIC_BLOCK_SHIFT;
335 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
336 sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
337 sizeof(struct gfs2_dinode)) / sizeof(u64);
338 sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
339 sizeof(struct gfs2_meta_header)) / sizeof(u64);
340 sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
341 sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
342 sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
343 sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
344 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
345 sizeof(struct gfs2_meta_header)) /
346 sizeof(struct gfs2_quota_change);
347
348 /* Compute maximum reservation required to add a entry to a directory */
349
350 hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
351 sdp->sd_jbsize);
352
353 ind_blocks = 0;
354 for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
355 tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
356 ind_blocks += tmp_blocks;
357 }
358
359 leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
360
361 sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
362
363 sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
364 sizeof(struct gfs2_dinode);
365 sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
366 for (x = 2;; x++) {
367 u64 space, d;
368 u32 m;
369
370 space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
371 d = space;
372 m = do_div(d, sdp->sd_inptrs);
373
374 if (d != sdp->sd_heightsize[x - 1] || m)
375 break;
376 sdp->sd_heightsize[x] = space;
377 }
378 sdp->sd_max_height = x;
379 sdp->sd_heightsize[x] = ~0;
380 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
381
382 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
383 sizeof(struct gfs2_dinode);
384 sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
385 for (x = 2;; x++) {
386 u64 space, d;
387 u32 m;
388
389 space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
390 d = space;
391 m = do_div(d, sdp->sd_inptrs);
392
393 if (d != sdp->sd_jheightsize[x - 1] || m)
394 break;
395 sdp->sd_jheightsize[x] = space;
396 }
397 sdp->sd_max_jheight = x;
398 sdp->sd_jheightsize[x] = ~0;
399 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
400
401 return 0;
114} 402}
115 403
116static int init_names(struct gfs2_sbd *sdp, int silent) 404static int init_names(struct gfs2_sbd *sdp, int silent)
@@ -224,51 +512,59 @@ fail:
224 return error; 512 return error;
225} 513}
226 514
227static inline struct inode *gfs2_lookup_root(struct super_block *sb, 515static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
228 u64 no_addr) 516 u64 no_addr, const char *name)
229{ 517{
230 return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); 518 struct gfs2_sbd *sdp = sb->s_fs_info;
519 struct dentry *dentry;
520 struct inode *inode;
521
522 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
523 if (IS_ERR(inode)) {
524 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
525 return PTR_ERR(inode);
526 }
527 dentry = d_alloc_root(inode);
528 if (!dentry) {
529 fs_err(sdp, "can't alloc %s dentry\n", name);
530 iput(inode);
531 return -ENOMEM;
532 }
533 dentry->d_op = &gfs2_dops;
534 *dptr = dentry;
535 return 0;
231} 536}
232 537
233static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) 538static int init_sb(struct gfs2_sbd *sdp, int silent)
234{ 539{
235 struct super_block *sb = sdp->sd_vfs; 540 struct super_block *sb = sdp->sd_vfs;
236 struct gfs2_holder sb_gh; 541 struct gfs2_holder sb_gh;
237 u64 no_addr; 542 u64 no_addr;
238 struct inode *inode; 543 int ret;
239 int error = 0;
240 544
241 if (undo) { 545 ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
242 if (sb->s_root) { 546 LM_ST_SHARED, 0, &sb_gh);
243 dput(sb->s_root); 547 if (ret) {
244 sb->s_root = NULL; 548 fs_err(sdp, "can't acquire superblock glock: %d\n", ret);
245 } 549 return ret;
246 return 0;
247 } 550 }
248 551
249 error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, 552 ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
250 LM_ST_SHARED, 0, &sb_gh); 553 if (ret) {
251 if (error) { 554 fs_err(sdp, "can't read superblock: %d\n", ret);
252 fs_err(sdp, "can't acquire superblock glock: %d\n", error);
253 return error;
254 }
255
256 error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
257 if (error) {
258 fs_err(sdp, "can't read superblock: %d\n", error);
259 goto out; 555 goto out;
260 } 556 }
261 557
262 /* Set up the buffer cache and SB for real */ 558 /* Set up the buffer cache and SB for real */
263 if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { 559 if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
264 error = -EINVAL; 560 ret = -EINVAL;
265 fs_err(sdp, "FS block size (%u) is too small for device " 561 fs_err(sdp, "FS block size (%u) is too small for device "
266 "block size (%u)\n", 562 "block size (%u)\n",
267 sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); 563 sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
268 goto out; 564 goto out;
269 } 565 }
270 if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { 566 if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
271 error = -EINVAL; 567 ret = -EINVAL;
272 fs_err(sdp, "FS block size (%u) is too big for machine " 568 fs_err(sdp, "FS block size (%u) is too big for machine "
273 "page size (%u)\n", 569 "page size (%u)\n",
274 sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); 570 sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
@@ -278,26 +574,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
278 574
279 /* Get the root inode */ 575 /* Get the root inode */
280 no_addr = sdp->sd_sb.sb_root_dir.no_addr; 576 no_addr = sdp->sd_sb.sb_root_dir.no_addr;
281 if (sb->s_type == &gfs2meta_fs_type) 577 ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root");
282 no_addr = sdp->sd_sb.sb_master_dir.no_addr; 578 if (ret)
283 inode = gfs2_lookup_root(sb, no_addr);
284 if (IS_ERR(inode)) {
285 error = PTR_ERR(inode);
286 fs_err(sdp, "can't read in root inode: %d\n", error);
287 goto out; 579 goto out;
288 }
289 580
290 sb->s_root = d_alloc_root(inode); 581 /* Get the master inode */
291 if (!sb->s_root) { 582 no_addr = sdp->sd_sb.sb_master_dir.no_addr;
292 fs_err(sdp, "can't get root dentry\n"); 583 ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master");
293 error = -ENOMEM; 584 if (ret) {
294 iput(inode); 585 dput(sdp->sd_root_dir);
295 } else 586 goto out;
296 sb->s_root->d_op = &gfs2_dops; 587 }
297 588 sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir);
298out: 589out:
299 gfs2_glock_dq_uninit(&sb_gh); 590 gfs2_glock_dq_uninit(&sb_gh);
300 return error; 591 return ret;
301} 592}
302 593
303/** 594/**
@@ -372,6 +663,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
372 663
373static int init_journal(struct gfs2_sbd *sdp, int undo) 664static int init_journal(struct gfs2_sbd *sdp, int undo)
374{ 665{
666 struct inode *master = sdp->sd_master_dir->d_inode;
375 struct gfs2_holder ji_gh; 667 struct gfs2_holder ji_gh;
376 struct task_struct *p; 668 struct task_struct *p;
377 struct gfs2_inode *ip; 669 struct gfs2_inode *ip;
@@ -383,7 +675,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
383 goto fail_recoverd; 675 goto fail_recoverd;
384 } 676 }
385 677
386 sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex"); 678 sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
387 if (IS_ERR(sdp->sd_jindex)) { 679 if (IS_ERR(sdp->sd_jindex)) {
388 fs_err(sdp, "can't lookup journal index: %d\n", error); 680 fs_err(sdp, "can't lookup journal index: %d\n", error);
389 return PTR_ERR(sdp->sd_jindex); 681 return PTR_ERR(sdp->sd_jindex);
@@ -506,25 +798,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
506{ 798{
507 int error = 0; 799 int error = 0;
508 struct gfs2_inode *ip; 800 struct gfs2_inode *ip;
509 struct inode *inode; 801 struct inode *master = sdp->sd_master_dir->d_inode;
510 802
511 if (undo) 803 if (undo)
512 goto fail_qinode; 804 goto fail_qinode;
513 805
514 inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
515 if (IS_ERR(inode)) {
516 error = PTR_ERR(inode);
517 fs_err(sdp, "can't read in master directory: %d\n", error);
518 goto fail;
519 }
520 sdp->sd_master_dir = inode;
521
522 error = init_journal(sdp, undo); 806 error = init_journal(sdp, undo);
523 if (error) 807 if (error)
524 goto fail_master; 808 goto fail;
525 809
526 /* Read in the master inode number inode */ 810 /* Read in the master inode number inode */
527 sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum"); 811 sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum");
528 if (IS_ERR(sdp->sd_inum_inode)) { 812 if (IS_ERR(sdp->sd_inum_inode)) {
529 error = PTR_ERR(sdp->sd_inum_inode); 813 error = PTR_ERR(sdp->sd_inum_inode);
530 fs_err(sdp, "can't read in inum inode: %d\n", error); 814 fs_err(sdp, "can't read in inum inode: %d\n", error);
@@ -533,7 +817,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
533 817
534 818
535 /* Read in the master statfs inode */ 819 /* Read in the master statfs inode */
536 sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs"); 820 sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
537 if (IS_ERR(sdp->sd_statfs_inode)) { 821 if (IS_ERR(sdp->sd_statfs_inode)) {
538 error = PTR_ERR(sdp->sd_statfs_inode); 822 error = PTR_ERR(sdp->sd_statfs_inode);
539 fs_err(sdp, "can't read in statfs inode: %d\n", error); 823 fs_err(sdp, "can't read in statfs inode: %d\n", error);
@@ -541,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
541 } 825 }
542 826
543 /* Read in the resource index inode */ 827 /* Read in the resource index inode */
544 sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex"); 828 sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
545 if (IS_ERR(sdp->sd_rindex)) { 829 if (IS_ERR(sdp->sd_rindex)) {
546 error = PTR_ERR(sdp->sd_rindex); 830 error = PTR_ERR(sdp->sd_rindex);
547 fs_err(sdp, "can't get resource index inode: %d\n", error); 831 fs_err(sdp, "can't get resource index inode: %d\n", error);
@@ -552,7 +836,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
552 sdp->sd_rindex_uptodate = 0; 836 sdp->sd_rindex_uptodate = 0;
553 837
554 /* Read in the quota inode */ 838 /* Read in the quota inode */
555 sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); 839 sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota");
556 if (IS_ERR(sdp->sd_quota_inode)) { 840 if (IS_ERR(sdp->sd_quota_inode)) {
557 error = PTR_ERR(sdp->sd_quota_inode); 841 error = PTR_ERR(sdp->sd_quota_inode);
558 fs_err(sdp, "can't get quota file inode: %d\n", error); 842 fs_err(sdp, "can't get quota file inode: %d\n", error);
@@ -571,8 +855,6 @@ fail_inum:
571 iput(sdp->sd_inum_inode); 855 iput(sdp->sd_inum_inode);
572fail_journal: 856fail_journal:
573 init_journal(sdp, UNDO); 857 init_journal(sdp, UNDO);
574fail_master:
575 iput(sdp->sd_master_dir);
576fail: 858fail:
577 return error; 859 return error;
578} 860}
@@ -583,6 +865,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
583 char buf[30]; 865 char buf[30];
584 int error = 0; 866 int error = 0;
585 struct gfs2_inode *ip; 867 struct gfs2_inode *ip;
868 struct inode *master = sdp->sd_master_dir->d_inode;
586 869
587 if (sdp->sd_args.ar_spectator) 870 if (sdp->sd_args.ar_spectator)
588 return 0; 871 return 0;
@@ -590,7 +873,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
590 if (undo) 873 if (undo)
591 goto fail_qc_gh; 874 goto fail_qc_gh;
592 875
593 pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node"); 876 pn = gfs2_lookup_simple(master, "per_node");
594 if (IS_ERR(pn)) { 877 if (IS_ERR(pn)) {
595 error = PTR_ERR(pn); 878 error = PTR_ERR(pn);
596 fs_err(sdp, "can't find per_node directory: %d\n", error); 879 fs_err(sdp, "can't find per_node directory: %d\n", error);
@@ -800,7 +1083,11 @@ static int fill_super(struct super_block *sb, void *data, int silent)
800 goto fail; 1083 goto fail;
801 } 1084 }
802 1085
803 init_vfs(sb, SDF_NOATIME); 1086 sb->s_magic = GFS2_MAGIC;
1087 sb->s_op = &gfs2_super_ops;
1088 sb->s_export_op = &gfs2_export_ops;
1089 sb->s_time_gran = 1;
1090 sb->s_maxbytes = MAX_LFS_FILESIZE;
804 1091
805 /* Set up the buffer cache and fill in some fake block size values 1092 /* Set up the buffer cache and fill in some fake block size values
806 to allow us to read-in the on-disk superblock. */ 1093 to allow us to read-in the on-disk superblock. */
@@ -828,7 +1115,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
828 if (error) 1115 if (error)
829 goto fail_lm; 1116 goto fail_lm;
830 1117
831 error = init_sb(sdp, silent, DO); 1118 error = init_sb(sdp, silent);
832 if (error) 1119 if (error)
833 goto fail_locking; 1120 goto fail_locking;
834 1121
@@ -869,7 +1156,11 @@ fail_per_node:
869fail_inodes: 1156fail_inodes:
870 init_inodes(sdp, UNDO); 1157 init_inodes(sdp, UNDO);
871fail_sb: 1158fail_sb:
872 init_sb(sdp, 0, UNDO); 1159 if (sdp->sd_root_dir)
1160 dput(sdp->sd_root_dir);
1161 if (sdp->sd_master_dir)
1162 dput(sdp->sd_master_dir);
1163 sb->s_root = NULL;
873fail_locking: 1164fail_locking:
874 init_locking(sdp, &mount_gh, UNDO); 1165 init_locking(sdp, &mount_gh, UNDO);
875fail_lm: 1166fail_lm:
@@ -887,151 +1178,63 @@ fail:
887} 1178}
888 1179
889static int gfs2_get_sb(struct file_system_type *fs_type, int flags, 1180static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
890 const char *dev_name, void *data, struct vfsmount *mnt) 1181 const char *dev_name, void *data, struct vfsmount *mnt)
891{ 1182{
892 struct super_block *sb; 1183 return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
893 struct gfs2_sbd *sdp;
894 int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
895 if (error)
896 goto out;
897 sb = mnt->mnt_sb;
898 sdp = sb->s_fs_info;
899 sdp->sd_gfs2mnt = mnt;
900out:
901 return error;
902} 1184}
903 1185
904static int fill_super_meta(struct super_block *sb, struct super_block *new, 1186static struct super_block *get_gfs2_sb(const char *dev_name)
905 void *data, int silent)
906{ 1187{
907 struct gfs2_sbd *sdp = sb->s_fs_info; 1188 struct super_block *sb;
908 struct inode *inode;
909 int error = 0;
910
911 new->s_fs_info = sdp;
912 sdp->sd_vfs_meta = sb;
913
914 init_vfs(new, SDF_NOATIME);
915
916 /* Get the master inode */
917 inode = igrab(sdp->sd_master_dir);
918
919 new->s_root = d_alloc_root(inode);
920 if (!new->s_root) {
921 fs_err(sdp, "can't get root dentry\n");
922 error = -ENOMEM;
923 iput(inode);
924 } else
925 new->s_root->d_op = &gfs2_dops;
926
927 return error;
928}
929
930static int set_bdev_super(struct super_block *s, void *data)
931{
932 s->s_bdev = data;
933 s->s_dev = s->s_bdev->bd_dev;
934 return 0;
935}
936
937static int test_bdev_super(struct super_block *s, void *data)
938{
939 return s->s_bdev == data;
940}
941
942static struct super_block* get_gfs2_sb(const char *dev_name)
943{
944 struct kstat stat;
945 struct nameidata nd; 1189 struct nameidata nd;
946 struct super_block *sb = NULL, *s;
947 int error; 1190 int error;
948 1191
949 error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); 1192 error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
950 if (error) { 1193 if (error) {
951 printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", 1194 printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
952 dev_name); 1195 dev_name, error);
953 goto out; 1196 return NULL;
954 }
955 error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat);
956
957 list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) {
958 if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
959 (S_ISDIR(stat.mode) &&
960 s == nd.path.dentry->d_inode->i_sb)) {
961 sb = s;
962 goto free_nd;
963 }
964 } 1197 }
965 1198 sb = nd.path.dentry->d_inode->i_sb;
966 printk(KERN_WARNING "GFS2: Unrecognized block device or " 1199 if (sb && (sb->s_type == &gfs2_fs_type))
967 "mount point %s\n", dev_name); 1200 atomic_inc(&sb->s_active);
968 1201 else
969free_nd: 1202 sb = NULL;
970 path_put(&nd.path); 1203 path_put(&nd.path);
971out:
972 return sb; 1204 return sb;
973} 1205}
974 1206
975static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, 1207static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
976 const char *dev_name, void *data, struct vfsmount *mnt) 1208 const char *dev_name, void *data, struct vfsmount *mnt)
977{ 1209{
978 int error = 0; 1210 struct super_block *sb = NULL;
979 struct super_block *sb = NULL, *new;
980 struct gfs2_sbd *sdp; 1211 struct gfs2_sbd *sdp;
981 1212
982 sb = get_gfs2_sb(dev_name); 1213 sb = get_gfs2_sb(dev_name);
983 if (!sb) { 1214 if (!sb) {
984 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); 1215 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
985 error = -ENOENT; 1216 return -ENOENT;
986 goto error;
987 } 1217 }
988 sdp = sb->s_fs_info; 1218 sdp = sb->s_fs_info;
989 if (sdp->sd_vfs_meta) { 1219 mnt->mnt_sb = sb;
990 printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); 1220 mnt->mnt_root = dget(sdp->sd_master_dir);
991 error = -EBUSY; 1221 return 0;
992 goto error;
993 }
994 down(&sb->s_bdev->bd_mount_sem);
995 new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev);
996 up(&sb->s_bdev->bd_mount_sem);
997 if (IS_ERR(new)) {
998 error = PTR_ERR(new);
999 goto error;
1000 }
1001 new->s_flags = flags;
1002 strlcpy(new->s_id, sb->s_id, sizeof(new->s_id));
1003 sb_set_blocksize(new, sb->s_blocksize);
1004 error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0);
1005 if (error) {
1006 up_write(&new->s_umount);
1007 deactivate_super(new);
1008 goto error;
1009 }
1010
1011 new->s_flags |= MS_ACTIVE;
1012
1013 /* Grab a reference to the gfs2 mount point */
1014 atomic_inc(&sdp->sd_gfs2mnt->mnt_count);
1015 return simple_set_mnt(mnt, new);
1016error:
1017 return error;
1018} 1222}
1019 1223
1020static void gfs2_kill_sb(struct super_block *sb) 1224static void gfs2_kill_sb(struct super_block *sb)
1021{ 1225{
1022 if (sb->s_fs_info) { 1226 struct gfs2_sbd *sdp = sb->s_fs_info;
1023 gfs2_delete_debugfs_file(sb->s_fs_info); 1227 if (sdp) {
1024 gfs2_meta_syncfs(sb->s_fs_info); 1228 gfs2_meta_syncfs(sdp);
1229 dput(sdp->sd_root_dir);
1230 dput(sdp->sd_master_dir);
1231 sdp->sd_root_dir = NULL;
1232 sdp->sd_master_dir = NULL;
1025 } 1233 }
1234 shrink_dcache_sb(sb);
1026 kill_block_super(sb); 1235 kill_block_super(sb);
1027} 1236 if (sdp)
1028 1237 gfs2_delete_debugfs_file(sdp);
1029static void gfs2_kill_sb_meta(struct super_block *sb)
1030{
1031 struct gfs2_sbd *sdp = sb->s_fs_info;
1032 generic_shutdown_super(sb);
1033 sdp->sd_vfs_meta = NULL;
1034 atomic_dec(&sdp->sd_gfs2mnt->mnt_count);
1035} 1238}
1036 1239
1037struct file_system_type gfs2_fs_type = { 1240struct file_system_type gfs2_fs_type = {
@@ -1046,7 +1249,6 @@ struct file_system_type gfs2meta_fs_type = {
1046 .name = "gfs2meta", 1249 .name = "gfs2meta",
1047 .fs_flags = FS_REQUIRES_DEV, 1250 .fs_flags = FS_REQUIRES_DEV,
1048 .get_sb = gfs2_get_sb_meta, 1251 .get_sb = gfs2_get_sb_meta,
1049 .kill_sb = gfs2_kill_sb_meta,
1050 .owner = THIS_MODULE, 1252 .owner = THIS_MODULE,
1051}; 1253};
1052 1254
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index e2c62f73a778..534e1e2c65ca 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
159 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 159 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
160 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 160 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
161 161
162 error = gfs2_glock_nq_m(2, ghs); 162 error = gfs2_glock_nq(ghs); /* parent */
163 if (error) 163 if (error)
164 goto out; 164 goto out_parent;
165
166 error = gfs2_glock_nq(ghs + 1); /* child */
167 if (error)
168 goto out_child;
165 169
166 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); 170 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
167 if (error) 171 if (error)
@@ -245,8 +249,10 @@ out_alloc:
245 if (alloc_required) 249 if (alloc_required)
246 gfs2_alloc_put(dip); 250 gfs2_alloc_put(dip);
247out_gunlock: 251out_gunlock:
248 gfs2_glock_dq_m(2, ghs); 252 gfs2_glock_dq(ghs + 1);
249out: 253out_child:
254 gfs2_glock_dq(ghs);
255out_parent:
250 gfs2_holder_uninit(ghs); 256 gfs2_holder_uninit(ghs);
251 gfs2_holder_uninit(ghs + 1); 257 gfs2_holder_uninit(ghs + 1);
252 if (!error) { 258 if (!error) {
@@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
302 308
303 error = gfs2_unlink_ok(dip, &dentry->d_name, ip); 309 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
304 if (error) 310 if (error)
305 goto out_rgrp; 311 goto out_gunlock;
306 312
307 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); 313 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
308 if (error) 314 if (error)
@@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
316 322
317out_end_trans: 323out_end_trans:
318 gfs2_trans_end(sdp); 324 gfs2_trans_end(sdp);
325out_gunlock:
319 gfs2_glock_dq(ghs + 2); 326 gfs2_glock_dq(ghs + 2);
320out_rgrp: 327out_rgrp:
321 gfs2_holder_uninit(ghs + 2); 328 gfs2_holder_uninit(ghs + 2);
@@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
485 struct gfs2_holder ri_gh; 492 struct gfs2_holder ri_gh;
486 int error; 493 int error;
487 494
488
489 error = gfs2_rindex_hold(sdp, &ri_gh); 495 error = gfs2_rindex_hold(sdp, &ri_gh);
490 if (error) 496 if (error)
491 return error; 497 return error;
@@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
495 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 501 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
496 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 502 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
497 503
498 error = gfs2_glock_nq_m(3, ghs); 504 error = gfs2_glock_nq(ghs); /* parent */
499 if (error) 505 if (error)
500 goto out; 506 goto out_parent;
507
508 error = gfs2_glock_nq(ghs + 1); /* child */
509 if (error)
510 goto out_child;
511
512 error = gfs2_glock_nq(ghs + 2); /* rgrp */
513 if (error)
514 goto out_rgrp;
501 515
502 error = gfs2_unlink_ok(dip, &dentry->d_name, ip); 516 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
503 if (error) 517 if (error)
@@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
523 gfs2_trans_end(sdp); 537 gfs2_trans_end(sdp);
524 538
525out_gunlock: 539out_gunlock:
526 gfs2_glock_dq_m(3, ghs); 540 gfs2_glock_dq(ghs + 2);
527out: 541out_rgrp:
528 gfs2_holder_uninit(ghs);
529 gfs2_holder_uninit(ghs + 1);
530 gfs2_holder_uninit(ghs + 2); 542 gfs2_holder_uninit(ghs + 2);
543 gfs2_glock_dq(ghs + 1);
544out_child:
545 gfs2_holder_uninit(ghs + 1);
546 gfs2_glock_dq(ghs);
547out_parent:
548 gfs2_holder_uninit(ghs);
531 gfs2_glock_dq_uninit(&ri_gh); 549 gfs2_glock_dq_uninit(&ri_gh);
532 return error; 550 return error;
533} 551}
@@ -571,6 +589,54 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
571 return 0; 589 return 0;
572} 590}
573 591
592/*
593 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
594 * @this: move this
595 * @to: to here
596 *
597 * Follow @to back to the root and make sure we don't encounter @this
598 * Assumes we already hold the rename lock.
599 *
600 * Returns: errno
601 */
602
603static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
604{
605 struct inode *dir = &to->i_inode;
606 struct super_block *sb = dir->i_sb;
607 struct inode *tmp;
608 struct qstr dotdot;
609 int error = 0;
610
611 gfs2_str2qstr(&dotdot, "..");
612
613 igrab(dir);
614
615 for (;;) {
616 if (dir == &this->i_inode) {
617 error = -EINVAL;
618 break;
619 }
620 if (dir == sb->s_root->d_inode) {
621 error = 0;
622 break;
623 }
624
625 tmp = gfs2_lookupi(dir, &dotdot, 1);
626 if (IS_ERR(tmp)) {
627 error = PTR_ERR(tmp);
628 break;
629 }
630
631 iput(dir);
632 dir = tmp;
633 }
634
635 iput(dir);
636
637 return error;
638}
639
574/** 640/**
575 * gfs2_rename - Rename a file 641 * gfs2_rename - Rename a file
576 * @odir: Parent directory of old file name 642 * @odir: Parent directory of old file name
@@ -589,7 +655,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
589 struct gfs2_inode *ip = GFS2_I(odentry->d_inode); 655 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
590 struct gfs2_inode *nip = NULL; 656 struct gfs2_inode *nip = NULL;
591 struct gfs2_sbd *sdp = GFS2_SB(odir); 657 struct gfs2_sbd *sdp = GFS2_SB(odir);
592 struct gfs2_holder ghs[5], r_gh; 658 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
593 struct gfs2_rgrpd *nrgd; 659 struct gfs2_rgrpd *nrgd;
594 unsigned int num_gh; 660 unsigned int num_gh;
595 int dir_rename = 0; 661 int dir_rename = 0;
@@ -603,19 +669,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
603 return 0; 669 return 0;
604 } 670 }
605 671
606 /* Make sure we aren't trying to move a dirctory into it's subdir */
607
608 if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) {
609 dir_rename = 1;
610 672
611 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, 673 if (odip != ndip) {
612 &r_gh); 674 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
675 0, &r_gh);
613 if (error) 676 if (error)
614 goto out; 677 goto out;
615 678
616 error = gfs2_ok_to_move(ip, ndip); 679 if (S_ISDIR(ip->i_inode.i_mode)) {
617 if (error) 680 dir_rename = 1;
618 goto out_gunlock_r; 681 /* don't move a dirctory into it's subdir */
682 error = gfs2_ok_to_move(ip, ndip);
683 if (error)
684 goto out_gunlock_r;
685 }
619 } 686 }
620 687
621 num_gh = 1; 688 num_gh = 1;
@@ -639,9 +706,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
639 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); 706 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
640 } 707 }
641 708
642 error = gfs2_glock_nq_m(num_gh, ghs); 709 for (x = 0; x < num_gh; x++) {
643 if (error) 710 error = gfs2_glock_nq(ghs + x);
644 goto out_uninit; 711 if (error)
712 goto out_gunlock;
713 }
645 714
646 /* Check out the old directory */ 715 /* Check out the old directory */
647 716
@@ -804,12 +873,12 @@ out_alloc:
804 if (alloc_required) 873 if (alloc_required)
805 gfs2_alloc_put(ndip); 874 gfs2_alloc_put(ndip);
806out_gunlock: 875out_gunlock:
807 gfs2_glock_dq_m(num_gh, ghs); 876 while (x--) {
808out_uninit: 877 gfs2_glock_dq(ghs + x);
809 for (x = 0; x < num_gh; x++)
810 gfs2_holder_uninit(ghs + x); 878 gfs2_holder_uninit(ghs + x);
879 }
811out_gunlock_r: 880out_gunlock_r:
812 if (dir_rename) 881 if (r_gh.gh_gl)
813 gfs2_glock_dq_uninit(&r_gh); 882 gfs2_glock_dq_uninit(&r_gh);
814out: 883out:
815 return error; 884 return error;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index f66ea0f7a356..d5355d9b5926 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -20,6 +20,7 @@
20#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h> 21#include <linux/crc32.h>
22#include <linux/lm_interface.h> 22#include <linux/lm_interface.h>
23#include <linux/time.h>
23 24
24#include "gfs2.h" 25#include "gfs2.h"
25#include "incore.h" 26#include "incore.h"
@@ -38,6 +39,7 @@
38#include "dir.h" 39#include "dir.h"
39#include "eattr.h" 40#include "eattr.h"
40#include "bmap.h" 41#include "bmap.h"
42#include "meta_io.h"
41 43
42/** 44/**
43 * gfs2_write_inode - Make sure the inode is stable on the disk 45 * gfs2_write_inode - Make sure the inode is stable on the disk
@@ -50,16 +52,74 @@
50static int gfs2_write_inode(struct inode *inode, int sync) 52static int gfs2_write_inode(struct inode *inode, int sync)
51{ 53{
52 struct gfs2_inode *ip = GFS2_I(inode); 54 struct gfs2_inode *ip = GFS2_I(inode);
53 55 struct gfs2_sbd *sdp = GFS2_SB(inode);
54 /* Check this is a "normal" inode */ 56 struct gfs2_holder gh;
55 if (test_bit(GIF_USER, &ip->i_flags)) { 57 struct buffer_head *bh;
56 if (current->flags & PF_MEMALLOC) 58 struct timespec atime;
57 return 0; 59 struct gfs2_dinode *di;
58 if (sync) 60 int ret = 0;
59 gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 61
62 /* Check this is a "normal" inode, etc */
63 if (!test_bit(GIF_USER, &ip->i_flags) ||
64 (current->flags & PF_MEMALLOC))
65 return 0;
66 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
67 if (ret)
68 goto do_flush;
69 ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
70 if (ret)
71 goto do_unlock;
72 ret = gfs2_meta_inode_buffer(ip, &bh);
73 if (ret == 0) {
74 di = (struct gfs2_dinode *)bh->b_data;
75 atime.tv_sec = be64_to_cpu(di->di_atime);
76 atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
77 if (timespec_compare(&inode->i_atime, &atime) > 0) {
78 gfs2_trans_add_bh(ip->i_gl, bh, 1);
79 gfs2_dinode_out(ip, bh->b_data);
80 }
81 brelse(bh);
60 } 82 }
83 gfs2_trans_end(sdp);
84do_unlock:
85 gfs2_glock_dq_uninit(&gh);
86do_flush:
87 if (sync != 0)
88 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
89 return ret;
90}
61 91
62 return 0; 92/**
93 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
94 * @sdp: the filesystem
95 *
96 * Returns: errno
97 */
98
99static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
100{
101 struct gfs2_holder t_gh;
102 int error;
103
104 gfs2_quota_sync(sdp);
105 gfs2_statfs_sync(sdp);
106
107 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
108 &t_gh);
109 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
110 return error;
111
112 gfs2_meta_syncfs(sdp);
113 gfs2_log_shutdown(sdp);
114
115 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
116
117 if (t_gh.gh_gl)
118 gfs2_glock_dq_uninit(&t_gh);
119
120 gfs2_quota_cleanup(sdp);
121
122 return error;
63} 123}
64 124
65/** 125/**
@@ -73,12 +133,6 @@ static void gfs2_put_super(struct super_block *sb)
73 struct gfs2_sbd *sdp = sb->s_fs_info; 133 struct gfs2_sbd *sdp = sb->s_fs_info;
74 int error; 134 int error;
75 135
76 if (!sdp)
77 return;
78
79 if (!strncmp(sb->s_type->name, "gfs2meta", 8))
80 return; /* Nothing to do */
81
82 /* Unfreeze the filesystem, if we need to */ 136 /* Unfreeze the filesystem, if we need to */
83 137
84 mutex_lock(&sdp->sd_freeze_lock); 138 mutex_lock(&sdp->sd_freeze_lock);
@@ -101,7 +155,6 @@ static void gfs2_put_super(struct super_block *sb)
101 155
102 /* Release stuff */ 156 /* Release stuff */
103 157
104 iput(sdp->sd_master_dir);
105 iput(sdp->sd_jindex); 158 iput(sdp->sd_jindex);
106 iput(sdp->sd_inum_inode); 159 iput(sdp->sd_inum_inode);
107 iput(sdp->sd_statfs_inode); 160 iput(sdp->sd_statfs_inode);
@@ -152,6 +205,7 @@ static void gfs2_write_super(struct super_block *sb)
152 * 205 *
153 * Flushes the log to disk. 206 * Flushes the log to disk.
154 */ 207 */
208
155static int gfs2_sync_fs(struct super_block *sb, int wait) 209static int gfs2_sync_fs(struct super_block *sb, int wait)
156{ 210{
157 sb->s_dirt = 0; 211 sb->s_dirt = 0;
@@ -270,14 +324,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
270 } 324 }
271 } 325 }
272 326
273 if (*flags & (MS_NOATIME | MS_NODIRATIME))
274 set_bit(SDF_NOATIME, &sdp->sd_flags);
275 else
276 clear_bit(SDF_NOATIME, &sdp->sd_flags);
277
278 /* Don't let the VFS update atimes. GFS2 handles this itself. */
279 *flags |= MS_NOATIME | MS_NODIRATIME;
280
281 return error; 327 return error;
282} 328}
283 329
@@ -295,6 +341,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
295 * inode's blocks, or alternatively pass the baton on to another 341 * inode's blocks, or alternatively pass the baton on to another
296 * node for later deallocation. 342 * node for later deallocation.
297 */ 343 */
344
298static void gfs2_drop_inode(struct inode *inode) 345static void gfs2_drop_inode(struct inode *inode)
299{ 346{
300 struct gfs2_inode *ip = GFS2_I(inode); 347 struct gfs2_inode *ip = GFS2_I(inode);
@@ -333,6 +380,16 @@ static void gfs2_clear_inode(struct inode *inode)
333 } 380 }
334} 381}
335 382
383static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
384{
385 do {
386 if (d1 == d2)
387 return 1;
388 d1 = d1->d_parent;
389 } while (!IS_ROOT(d1));
390 return 0;
391}
392
336/** 393/**
337 * gfs2_show_options - Show mount options for /proc/mounts 394 * gfs2_show_options - Show mount options for /proc/mounts
338 * @s: seq_file structure 395 * @s: seq_file structure
@@ -346,6 +403,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
346 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; 403 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
347 struct gfs2_args *args = &sdp->sd_args; 404 struct gfs2_args *args = &sdp->sd_args;
348 405
406 if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
407 seq_printf(s, ",meta");
349 if (args->ar_lockproto[0]) 408 if (args->ar_lockproto[0])
350 seq_printf(s, ",lockproto=%s", args->ar_lockproto); 409 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
351 if (args->ar_locktable[0]) 410 if (args->ar_locktable[0])
@@ -414,6 +473,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
414 * conversion on the iopen lock, but we can change that later. This 473 * conversion on the iopen lock, but we can change that later. This
415 * is safe, just less efficient. 474 * is safe, just less efficient.
416 */ 475 */
476
417static void gfs2_delete_inode(struct inode *inode) 477static void gfs2_delete_inode(struct inode *inode)
418{ 478{
419 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 479 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
@@ -478,8 +538,6 @@ out:
478 clear_inode(inode); 538 clear_inode(inode);
479} 539}
480 540
481
482
483static struct inode *gfs2_alloc_inode(struct super_block *sb) 541static struct inode *gfs2_alloc_inode(struct super_block *sb)
484{ 542{
485 struct gfs2_inode *ip; 543 struct gfs2_inode *ip;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ca831991cbc2..c3ba3d9d0aac 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -33,313 +33,6 @@
33#include "trans.h" 33#include "trans.h"
34#include "util.h" 34#include "util.h"
35 35
36static const u32 gfs2_old_fs_formats[] = {
37 0
38};
39
40static const u32 gfs2_old_multihost_formats[] = {
41 0
42};
43
44/**
45 * gfs2_tune_init - Fill a gfs2_tune structure with default values
46 * @gt: tune
47 *
48 */
49
50void gfs2_tune_init(struct gfs2_tune *gt)
51{
52 spin_lock_init(&gt->gt_spin);
53
54 gt->gt_demote_secs = 300;
55 gt->gt_incore_log_blocks = 1024;
56 gt->gt_log_flush_secs = 60;
57 gt->gt_recoverd_secs = 60;
58 gt->gt_logd_secs = 1;
59 gt->gt_quotad_secs = 5;
60 gt->gt_quota_simul_sync = 64;
61 gt->gt_quota_warn_period = 10;
62 gt->gt_quota_scale_num = 1;
63 gt->gt_quota_scale_den = 1;
64 gt->gt_quota_cache_secs = 300;
65 gt->gt_quota_quantum = 60;
66 gt->gt_atime_quantum = 3600;
67 gt->gt_new_files_jdata = 0;
68 gt->gt_max_readahead = 1 << 18;
69 gt->gt_stall_secs = 600;
70 gt->gt_complain_secs = 10;
71 gt->gt_statfs_quantum = 30;
72 gt->gt_statfs_slow = 0;
73}
74
75/**
76 * gfs2_check_sb - Check superblock
77 * @sdp: the filesystem
78 * @sb: The superblock
79 * @silent: Don't print a message if the check fails
80 *
81 * Checks the version code of the FS is one that we understand how to
82 * read and that the sizes of the various on-disk structures have not
83 * changed.
84 */
85
86int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
87{
88 unsigned int x;
89
90 if (sb->sb_magic != GFS2_MAGIC ||
91 sb->sb_type != GFS2_METATYPE_SB) {
92 if (!silent)
93 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
94 return -EINVAL;
95 }
96
97 /* If format numbers match exactly, we're done. */
98
99 if (sb->sb_fs_format == GFS2_FORMAT_FS &&
100 sb->sb_multihost_format == GFS2_FORMAT_MULTI)
101 return 0;
102
103 if (sb->sb_fs_format != GFS2_FORMAT_FS) {
104 for (x = 0; gfs2_old_fs_formats[x]; x++)
105 if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
106 break;
107
108 if (!gfs2_old_fs_formats[x]) {
109 printk(KERN_WARNING
110 "GFS2: code version (%u, %u) is incompatible "
111 "with ondisk format (%u, %u)\n",
112 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
113 sb->sb_fs_format, sb->sb_multihost_format);
114 printk(KERN_WARNING
115 "GFS2: I don't know how to upgrade this FS\n");
116 return -EINVAL;
117 }
118 }
119
120 if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
121 for (x = 0; gfs2_old_multihost_formats[x]; x++)
122 if (gfs2_old_multihost_formats[x] ==
123 sb->sb_multihost_format)
124 break;
125
126 if (!gfs2_old_multihost_formats[x]) {
127 printk(KERN_WARNING
128 "GFS2: code version (%u, %u) is incompatible "
129 "with ondisk format (%u, %u)\n",
130 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
131 sb->sb_fs_format, sb->sb_multihost_format);
132 printk(KERN_WARNING
133 "GFS2: I don't know how to upgrade this FS\n");
134 return -EINVAL;
135 }
136 }
137
138 if (!sdp->sd_args.ar_upgrade) {
139 printk(KERN_WARNING
140 "GFS2: code version (%u, %u) is incompatible "
141 "with ondisk format (%u, %u)\n",
142 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
143 sb->sb_fs_format, sb->sb_multihost_format);
144 printk(KERN_INFO
145 "GFS2: Use the \"upgrade\" mount option to upgrade "
146 "the FS\n");
147 printk(KERN_INFO "GFS2: See the manual for more details\n");
148 return -EINVAL;
149 }
150
151 return 0;
152}
153
154
155static void end_bio_io_page(struct bio *bio, int error)
156{
157 struct page *page = bio->bi_private;
158
159 if (!error)
160 SetPageUptodate(page);
161 else
162 printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
163 unlock_page(page);
164}
165
166static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
167{
168 const struct gfs2_sb *str = buf;
169
170 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
171 sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
172 sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
173 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
174 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
175 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
176 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
177 sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
178 sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
179 sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
180 sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
181
182 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
183 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
184}
185
186/**
187 * gfs2_read_super - Read the gfs2 super block from disk
188 * @sdp: The GFS2 super block
189 * @sector: The location of the super block
190 * @error: The error code to return
191 *
192 * This uses the bio functions to read the super block from disk
193 * because we want to be 100% sure that we never read cached data.
194 * A super block is read twice only during each GFS2 mount and is
195 * never written to by the filesystem. The first time its read no
196 * locks are held, and the only details which are looked at are those
197 * relating to the locking protocol. Once locking is up and working,
198 * the sb is read again under the lock to establish the location of
199 * the master directory (contains pointers to journals etc) and the
200 * root directory.
201 *
202 * Returns: 0 on success or error
203 */
204
205int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
206{
207 struct super_block *sb = sdp->sd_vfs;
208 struct gfs2_sb *p;
209 struct page *page;
210 struct bio *bio;
211
212 page = alloc_page(GFP_NOFS);
213 if (unlikely(!page))
214 return -ENOBUFS;
215
216 ClearPageUptodate(page);
217 ClearPageDirty(page);
218 lock_page(page);
219
220 bio = bio_alloc(GFP_NOFS, 1);
221 if (unlikely(!bio)) {
222 __free_page(page);
223 return -ENOBUFS;
224 }
225
226 bio->bi_sector = sector * (sb->s_blocksize >> 9);
227 bio->bi_bdev = sb->s_bdev;
228 bio_add_page(bio, page, PAGE_SIZE, 0);
229
230 bio->bi_end_io = end_bio_io_page;
231 bio->bi_private = page;
232 submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
233 wait_on_page_locked(page);
234 bio_put(bio);
235 if (!PageUptodate(page)) {
236 __free_page(page);
237 return -EIO;
238 }
239 p = kmap(page);
240 gfs2_sb_in(&sdp->sd_sb, p);
241 kunmap(page);
242 __free_page(page);
243 return 0;
244}
245
246/**
247 * gfs2_read_sb - Read super block
248 * @sdp: The GFS2 superblock
249 * @gl: the glock for the superblock (assumed to be held)
250 * @silent: Don't print message if mount fails
251 *
252 */
253
254int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
255{
256 u32 hash_blocks, ind_blocks, leaf_blocks;
257 u32 tmp_blocks;
258 unsigned int x;
259 int error;
260
261 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
262 if (error) {
263 if (!silent)
264 fs_err(sdp, "can't read superblock\n");
265 return error;
266 }
267
268 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
269 if (error)
270 return error;
271
272 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
273 GFS2_BASIC_BLOCK_SHIFT;
274 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
275 sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
276 sizeof(struct gfs2_dinode)) / sizeof(u64);
277 sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
278 sizeof(struct gfs2_meta_header)) / sizeof(u64);
279 sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
280 sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
281 sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
282 sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
283 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
284 sizeof(struct gfs2_meta_header)) /
285 sizeof(struct gfs2_quota_change);
286
287 /* Compute maximum reservation required to add a entry to a directory */
288
289 hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
290 sdp->sd_jbsize);
291
292 ind_blocks = 0;
293 for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
294 tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
295 ind_blocks += tmp_blocks;
296 }
297
298 leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
299
300 sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
301
302 sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
303 sizeof(struct gfs2_dinode);
304 sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
305 for (x = 2;; x++) {
306 u64 space, d;
307 u32 m;
308
309 space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
310 d = space;
311 m = do_div(d, sdp->sd_inptrs);
312
313 if (d != sdp->sd_heightsize[x - 1] || m)
314 break;
315 sdp->sd_heightsize[x] = space;
316 }
317 sdp->sd_max_height = x;
318 sdp->sd_heightsize[x] = ~0;
319 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
320
321 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
322 sizeof(struct gfs2_dinode);
323 sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
324 for (x = 2;; x++) {
325 u64 space, d;
326 u32 m;
327
328 space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
329 d = space;
330 m = do_div(d, sdp->sd_inptrs);
331
332 if (d != sdp->sd_jheightsize[x - 1] || m)
333 break;
334 sdp->sd_jheightsize[x] = space;
335 }
336 sdp->sd_max_jheight = x;
337 sdp->sd_jheightsize[x] = ~0;
338 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
339
340 return 0;
341}
342
343/** 36/**
344 * gfs2_jindex_hold - Grab a lock on the jindex 37 * gfs2_jindex_hold - Grab a lock on the jindex
345 * @sdp: The GFS2 superblock 38 * @sdp: The GFS2 superblock
@@ -581,39 +274,6 @@ fail:
581 return error; 274 return error;
582} 275}
583 276
584/**
585 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
586 * @sdp: the filesystem
587 *
588 * Returns: errno
589 */
590
591int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
592{
593 struct gfs2_holder t_gh;
594 int error;
595
596 gfs2_quota_sync(sdp);
597 gfs2_statfs_sync(sdp);
598
599 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
600 &t_gh);
601 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
602 return error;
603
604 gfs2_meta_syncfs(sdp);
605 gfs2_log_shutdown(sdp);
606
607 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
608
609 if (t_gh.gh_gl)
610 gfs2_glock_dq_uninit(&t_gh);
611
612 gfs2_quota_cleanup(sdp);
613
614 return error;
615}
616
617static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) 277static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
618{ 278{
619 const struct gfs2_statfs_change *str = buf; 279 const struct gfs2_statfs_change *str = buf;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 44361ecc44f7..50a4c9b1215e 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -12,11 +12,6 @@
12 12
13#include "incore.h" 13#include "incore.h"
14 14
15void gfs2_tune_init(struct gfs2_tune *gt);
16
17int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
18int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
19int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
20void gfs2_lm_unmount(struct gfs2_sbd *sdp); 15void gfs2_lm_unmount(struct gfs2_sbd *sdp);
21 16
22static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) 17static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
@@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
40 struct gfs2_inode **ipp); 35 struct gfs2_inode **ipp);
41 36
42int gfs2_make_fs_rw(struct gfs2_sbd *sdp); 37int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
43int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
44 38
45int gfs2_statfs_init(struct gfs2_sbd *sdp); 39int gfs2_statfs_init(struct gfs2_sbd *sdp);
46void gfs2_statfs_change(struct gfs2_sbd *sdp, 40void gfs2_statfs_change(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 74846559fc3f..7e1879f1a02c 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -269,14 +269,6 @@ ARGS_ATTR(quota, "%u\n");
269ARGS_ATTR(suiddir, "%d\n"); 269ARGS_ATTR(suiddir, "%d\n");
270ARGS_ATTR(data, "%d\n"); 270ARGS_ATTR(data, "%d\n");
271 271
272/* one oddball doesn't fit the macro mold */
273static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
274{
275 return snprintf(buf, PAGE_SIZE, "%d\n",
276 !!test_bit(SDF_NOATIME, &sdp->sd_flags));
277}
278static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
279
280static struct attribute *args_attrs[] = { 272static struct attribute *args_attrs[] = {
281 &args_attr_lockproto.attr, 273 &args_attr_lockproto.attr,
282 &args_attr_locktable.attr, 274 &args_attr_locktable.attr,
@@ -292,7 +284,6 @@ static struct attribute *args_attrs[] = {
292 &args_attr_quota.attr, 284 &args_attr_quota.attr,
293 &args_attr_suiddir.attr, 285 &args_attr_suiddir.attr,
294 &args_attr_data.attr, 286 &args_attr_data.attr,
295 &args_attr_noatime.attr,
296 NULL, 287 NULL,
297}; 288};
298 289
@@ -407,7 +398,6 @@ TUNE_ATTR(incore_log_blocks, 0);
407TUNE_ATTR(log_flush_secs, 0); 398TUNE_ATTR(log_flush_secs, 0);
408TUNE_ATTR(quota_warn_period, 0); 399TUNE_ATTR(quota_warn_period, 0);
409TUNE_ATTR(quota_quantum, 0); 400TUNE_ATTR(quota_quantum, 0);
410TUNE_ATTR(atime_quantum, 0);
411TUNE_ATTR(max_readahead, 0); 401TUNE_ATTR(max_readahead, 0);
412TUNE_ATTR(complain_secs, 0); 402TUNE_ATTR(complain_secs, 0);
413TUNE_ATTR(statfs_slow, 0); 403TUNE_ATTR(statfs_slow, 0);
@@ -427,7 +417,6 @@ static struct attribute *tune_attrs[] = {
427 &tune_attr_log_flush_secs.attr, 417 &tune_attr_log_flush_secs.attr,
428 &tune_attr_quota_warn_period.attr, 418 &tune_attr_quota_warn_period.attr,
429 &tune_attr_quota_quantum.attr, 419 &tune_attr_quota_quantum.attr,
430 &tune_attr_atime_quantum.attr,
431 &tune_attr_max_readahead.attr, 420 &tune_attr_max_readahead.attr,
432 &tune_attr_complain_secs.attr, 421 &tune_attr_complain_secs.attr,
433 &tune_attr_statfs_slow.attr, 422 &tune_attr_statfs_slow.attr,
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 60249429a253..d85c7d931cdf 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -323,7 +323,7 @@ out:
323} 323}
324 324
325/* 325/*
326 * remove_kevent - cleans up and ultimately frees the given kevent 326 * remove_kevent - cleans up the given kevent
327 * 327 *
328 * Caller must hold dev->ev_mutex. 328 * Caller must hold dev->ev_mutex.
329 */ 329 */
@@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev,
334 334
335 dev->event_count--; 335 dev->event_count--;
336 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; 336 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
337}
337 338
339/*
340 * free_kevent - frees the given kevent.
341 */
342static void free_kevent(struct inotify_kernel_event *kevent)
343{
338 kfree(kevent->name); 344 kfree(kevent->name);
339 kmem_cache_free(event_cachep, kevent); 345 kmem_cache_free(event_cachep, kevent);
340} 346}
@@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
350 struct inotify_kernel_event *kevent; 356 struct inotify_kernel_event *kevent;
351 kevent = inotify_dev_get_event(dev); 357 kevent = inotify_dev_get_event(dev);
352 remove_kevent(dev, kevent); 358 remove_kevent(dev, kevent);
359 free_kevent(kevent);
353 } 360 }
354} 361}
355 362
@@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
433 dev = file->private_data; 440 dev = file->private_data;
434 441
435 while (1) { 442 while (1) {
436 int events;
437 443
438 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); 444 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
439 445
440 mutex_lock(&dev->ev_mutex); 446 mutex_lock(&dev->ev_mutex);
441 events = !list_empty(&dev->events); 447 if (!list_empty(&dev->events)) {
442 mutex_unlock(&dev->ev_mutex);
443 if (events) {
444 ret = 0; 448 ret = 0;
445 break; 449 break;
446 } 450 }
451 mutex_unlock(&dev->ev_mutex);
447 452
448 if (file->f_flags & O_NONBLOCK) { 453 if (file->f_flags & O_NONBLOCK) {
449 ret = -EAGAIN; 454 ret = -EAGAIN;
@@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
462 if (ret) 467 if (ret)
463 return ret; 468 return ret;
464 469
465 mutex_lock(&dev->ev_mutex);
466 while (1) { 470 while (1) {
467 struct inotify_kernel_event *kevent; 471 struct inotify_kernel_event *kevent;
468 472
@@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
481 } 485 }
482 break; 486 break;
483 } 487 }
488 remove_kevent(dev, kevent);
489
490 /*
491 * Must perform the copy_to_user outside the mutex in order
492 * to avoid a lock order reversal with mmap_sem.
493 */
494 mutex_unlock(&dev->ev_mutex);
484 495
485 if (copy_to_user(buf, &kevent->event, event_size)) { 496 if (copy_to_user(buf, &kevent->event, event_size)) {
486 ret = -EFAULT; 497 ret = -EFAULT;
@@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
498 count -= kevent->event.len; 509 count -= kevent->event.len;
499 } 510 }
500 511
501 remove_kevent(dev, kevent); 512 free_kevent(kevent);
513
514 mutex_lock(&dev->ev_mutex);
502 } 515 }
503 mutex_unlock(&dev->ev_mutex); 516 mutex_unlock(&dev->ev_mutex);
504 517
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9abcd2b329f7..e9b20173fef3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw,
1279 } 1279 }
1280 } 1280 }
1281 1281
1282 if (errors > 0) {
1283 dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n",
1284 errors, (errors == 1 ? "" : "s"));
1285 if (!sloppy)
1286 return 0;
1287 }
1282 return 1; 1288 return 1;
1283 1289
1284out_nomem: 1290out_nomem:
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index b6ed38380ab8..54b8b4140c8f 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt)
443 * enough space for either: 443 * enough space for either:
444 */ 444 */
445 alloc = sizeof(struct posix_ace_state_array) 445 alloc = sizeof(struct posix_ace_state_array)
446 + cnt*sizeof(struct posix_ace_state); 446 + cnt*sizeof(struct posix_user_ace_state);
447 state->users = kzalloc(alloc, GFP_KERNEL); 447 state->users = kzalloc(alloc, GFP_KERNEL);
448 if (!state->users) 448 if (!state->users)
449 return -ENOMEM; 449 return -ENOMEM;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2e51adac65de..e5b51ffafc6c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
867 int slack_bytes; 867 int slack_bytes;
868 __be32 status; 868 __be32 status;
869 869
870 status = nfserr_resource;
871 cstate = cstate_alloc();
872 if (cstate == NULL)
873 goto out;
874
875 resp->xbuf = &rqstp->rq_res; 870 resp->xbuf = &rqstp->rq_res;
876 resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; 871 resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len;
877 resp->tagp = resp->p; 872 resp->tagp = resp->p;
@@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
890 if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) 885 if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION)
891 goto out; 886 goto out;
892 887
888 status = nfserr_resource;
889 cstate = cstate_alloc();
890 if (cstate == NULL)
891 goto out;
892
893 status = nfs_ok; 893 status = nfs_ok;
894 while (!status && resp->opcnt < args->opcnt) { 894 while (!status && resp->opcnt < args->opcnt) {
895 op = &args->ops[resp->opcnt++]; 895 op = &args->ops[resp->opcnt++];
@@ -957,9 +957,9 @@ encode_op:
957 nfsd4_increment_op_stats(op->opnum); 957 nfsd4_increment_op_stats(op->opnum);
958 } 958 }
959 959
960 cstate_free(cstate);
960out: 961out:
961 nfsd4_release_compoundargs(args); 962 nfsd4_release_compoundargs(args);
962 cstate_free(cstate);
963 dprintk("nfsv4 compound returned %d\n", ntohl(status)); 963 dprintk("nfsv4 compound returned %d\n", ntohl(status));
964 return status; 964 return status;
965} 965}
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
index 3a8af75351e8..4087fbdac327 100644
--- a/fs/ntfs/usnjrnl.h
+++ b/fs/ntfs/usnjrnl.h
@@ -113,7 +113,7 @@ typedef struct {
113 * Reason flags (32-bit). Cumulative flags describing the change(s) to the 113 * Reason flags (32-bit). Cumulative flags describing the change(s) to the
114 * file since it was last opened. I think the names speak for themselves but 114 * file since it was last opened. I think the names speak for themselves but
115 * if you disagree check out the descriptions in the Linux NTFS project NTFS 115 * if you disagree check out the descriptions in the Linux NTFS project NTFS
116 * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html 116 * documentation: http://www.linux-ntfs.org/
117 */ 117 */
118enum { 118enum {
119 USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), 119 USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001),
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS;
145 * Source info flags (32-bit). Information about the source of the change(s) 145 * Source info flags (32-bit). Information about the source of the change(s)
146 * to the file. For detailed descriptions of what these mean, see the Linux 146 * to the file. For detailed descriptions of what these mean, see the Linux
147 * NTFS project NTFS documentation: 147 * NTFS project NTFS documentation:
148 * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html 148 * http://www.linux-ntfs.org/
149 */ 149 */
150enum { 150enum {
151 USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), 151 USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001),
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 506c24fb5078..a53da1466277 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -594,7 +594,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
594 goto bail; 594 goto bail;
595 } 595 }
596 596
597 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { 597 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
598 ocfs2_error(inode->i_sb, 598 ocfs2_error(inode->i_sb,
599 "Inode %llu has a hole at block %llu\n", 599 "Inode %llu has a hole at block %llu\n",
600 (unsigned long long)OCFS2_I(inode)->ip_blkno, 600 (unsigned long long)OCFS2_I(inode)->ip_blkno,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7d6b34e201db..7408227c49c9 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
120 * a pointer to that same buffer (for convenience). 120 * a pointer to that same buffer (for convenience).
121 */ 121 */
122 122
123char *disk_name(struct gendisk *hd, int part, char *buf) 123char *disk_name(struct gendisk *hd, int partno, char *buf)
124{ 124{
125 if (!part) 125 if (!partno)
126 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 126 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
127 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 127 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
128 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part); 128 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
129 else 129 else
130 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part); 130 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
131 131
132 return buf; 132 return buf;
133} 133}
134 134
135const char *bdevname(struct block_device *bdev, char *buf) 135const char *bdevname(struct block_device *bdev, char *buf)
136{ 136{
137 int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; 137 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
138 return disk_name(bdev->bd_disk, part, buf);
139} 138}
140 139
141EXPORT_SYMBOL(bdevname); 140EXPORT_SYMBOL(bdevname);
@@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
169 if (isdigit(state->name[strlen(state->name)-1])) 168 if (isdigit(state->name[strlen(state->name)-1]))
170 sprintf(state->name, "p"); 169 sprintf(state->name, "p");
171 170
172 state->limit = hd->minors; 171 state->limit = disk_max_parts(hd);
173 i = res = err = 0; 172 i = res = err = 0;
174 while (!res && check_part[i]) { 173 while (!res && check_part[i]) {
175 memset(&state->parts, 0, sizeof(state->parts)); 174 memset(&state->parts, 0, sizeof(state->parts));
@@ -204,21 +203,22 @@ static ssize_t part_start_show(struct device *dev,
204 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 203 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
205} 204}
206 205
207static ssize_t part_size_show(struct device *dev, 206ssize_t part_size_show(struct device *dev,
208 struct device_attribute *attr, char *buf) 207 struct device_attribute *attr, char *buf)
209{ 208{
210 struct hd_struct *p = dev_to_part(dev); 209 struct hd_struct *p = dev_to_part(dev);
211 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); 210 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
212} 211}
213 212
214static ssize_t part_stat_show(struct device *dev, 213ssize_t part_stat_show(struct device *dev,
215 struct device_attribute *attr, char *buf) 214 struct device_attribute *attr, char *buf)
216{ 215{
217 struct hd_struct *p = dev_to_part(dev); 216 struct hd_struct *p = dev_to_part(dev);
217 int cpu;
218 218
219 preempt_disable(); 219 cpu = part_stat_lock();
220 part_round_stats(p); 220 part_round_stats(cpu, p);
221 preempt_enable(); 221 part_stat_unlock();
222 return sprintf(buf, 222 return sprintf(buf,
223 "%8lu %8lu %8llu %8u " 223 "%8lu %8lu %8llu %8u "
224 "%8lu %8lu %8llu %8u " 224 "%8lu %8lu %8llu %8u "
@@ -238,17 +238,17 @@ static ssize_t part_stat_show(struct device *dev,
238} 238}
239 239
240#ifdef CONFIG_FAIL_MAKE_REQUEST 240#ifdef CONFIG_FAIL_MAKE_REQUEST
241static ssize_t part_fail_show(struct device *dev, 241ssize_t part_fail_show(struct device *dev,
242 struct device_attribute *attr, char *buf) 242 struct device_attribute *attr, char *buf)
243{ 243{
244 struct hd_struct *p = dev_to_part(dev); 244 struct hd_struct *p = dev_to_part(dev);
245 245
246 return sprintf(buf, "%d\n", p->make_it_fail); 246 return sprintf(buf, "%d\n", p->make_it_fail);
247} 247}
248 248
249static ssize_t part_fail_store(struct device *dev, 249ssize_t part_fail_store(struct device *dev,
250 struct device_attribute *attr, 250 struct device_attribute *attr,
251 const char *buf, size_t count) 251 const char *buf, size_t count)
252{ 252{
253 struct hd_struct *p = dev_to_part(dev); 253 struct hd_struct *p = dev_to_part(dev);
254 int i; 254 int i;
@@ -300,40 +300,34 @@ struct device_type part_type = {
300 .release = part_release, 300 .release = part_release,
301}; 301};
302 302
303static inline void partition_sysfs_add_subdir(struct hd_struct *p) 303static void delete_partition_rcu_cb(struct rcu_head *head)
304{
305 struct kobject *k;
306
307 k = kobject_get(&p->dev.kobj);
308 p->holder_dir = kobject_create_and_add("holders", k);
309 kobject_put(k);
310}
311
312static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
313{ 304{
314 struct kobject *k; 305 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
315 306
316 k = kobject_get(&disk->dev.kobj); 307 part->start_sect = 0;
317 disk->holder_dir = kobject_create_and_add("holders", k); 308 part->nr_sects = 0;
318 disk->slave_dir = kobject_create_and_add("slaves", k); 309 part_stat_set_all(part, 0);
319 kobject_put(k); 310 put_device(part_to_dev(part));
320} 311}
321 312
322void delete_partition(struct gendisk *disk, int part) 313void delete_partition(struct gendisk *disk, int partno)
323{ 314{
324 struct hd_struct *p = disk->part[part-1]; 315 struct disk_part_tbl *ptbl = disk->part_tbl;
316 struct hd_struct *part;
325 317
326 if (!p) 318 if (partno >= ptbl->len)
327 return; 319 return;
328 if (!p->nr_sects) 320
321 part = ptbl->part[partno];
322 if (!part)
329 return; 323 return;
330 disk->part[part-1] = NULL; 324
331 p->start_sect = 0; 325 blk_free_devt(part_devt(part));
332 p->nr_sects = 0; 326 rcu_assign_pointer(ptbl->part[partno], NULL);
333 part_stat_set_all(p, 0); 327 kobject_put(part->holder_dir);
334 kobject_put(p->holder_dir); 328 device_del(part_to_dev(part));
335 device_del(&p->dev); 329
336 put_device(&p->dev); 330 call_rcu(&part->rcu_head, delete_partition_rcu_cb);
337} 331}
338 332
339static ssize_t whole_disk_show(struct device *dev, 333static ssize_t whole_disk_show(struct device *dev,
@@ -344,102 +338,132 @@ static ssize_t whole_disk_show(struct device *dev,
344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 338static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
345 whole_disk_show, NULL); 339 whole_disk_show, NULL);
346 340
347int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) 341int add_partition(struct gendisk *disk, int partno,
342 sector_t start, sector_t len, int flags)
348{ 343{
349 struct hd_struct *p; 344 struct hd_struct *p;
345 dev_t devt = MKDEV(0, 0);
346 struct device *ddev = disk_to_dev(disk);
347 struct device *pdev;
348 struct disk_part_tbl *ptbl;
349 const char *dname;
350 int err; 350 int err;
351 351
352 err = disk_expand_part_tbl(disk, partno);
353 if (err)
354 return err;
355 ptbl = disk->part_tbl;
356
357 if (ptbl->part[partno])
358 return -EBUSY;
359
352 p = kzalloc(sizeof(*p), GFP_KERNEL); 360 p = kzalloc(sizeof(*p), GFP_KERNEL);
353 if (!p) 361 if (!p)
354 return -ENOMEM; 362 return -ENOMEM;
355 363
356 if (!init_part_stats(p)) { 364 if (!init_part_stats(p)) {
357 err = -ENOMEM; 365 err = -ENOMEM;
358 goto out0; 366 goto out_free;
359 } 367 }
368 pdev = part_to_dev(p);
369
360 p->start_sect = start; 370 p->start_sect = start;
361 p->nr_sects = len; 371 p->nr_sects = len;
362 p->partno = part; 372 p->partno = partno;
363 p->policy = disk->policy; 373 p->policy = get_disk_ro(disk);
364 374
365 if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) 375 dname = dev_name(ddev);
366 snprintf(p->dev.bus_id, BUS_ID_SIZE, 376 if (isdigit(dname[strlen(dname) - 1]))
367 "%sp%d", disk->dev.bus_id, part); 377 snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno);
368 else 378 else
369 snprintf(p->dev.bus_id, BUS_ID_SIZE, 379 snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno);
370 "%s%d", disk->dev.bus_id, part); 380
381 device_initialize(pdev);
382 pdev->class = &block_class;
383 pdev->type = &part_type;
384 pdev->parent = ddev;
371 385
372 device_initialize(&p->dev); 386 err = blk_alloc_devt(p, &devt);
373 p->dev.devt = MKDEV(disk->major, disk->first_minor + part); 387 if (err)
374 p->dev.class = &block_class; 388 goto out_free;
375 p->dev.type = &part_type; 389 pdev->devt = devt;
376 p->dev.parent = &disk->dev;
377 disk->part[part-1] = p;
378 390
379 /* delay uevent until 'holders' subdir is created */ 391 /* delay uevent until 'holders' subdir is created */
380 p->dev.uevent_suppress = 1; 392 pdev->uevent_suppress = 1;
381 err = device_add(&p->dev); 393 err = device_add(pdev);
382 if (err) 394 if (err)
383 goto out1; 395 goto out_put;
384 partition_sysfs_add_subdir(p); 396
385 p->dev.uevent_suppress = 0; 397 err = -ENOMEM;
398 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
399 if (!p->holder_dir)
400 goto out_del;
401
402 pdev->uevent_suppress = 0;
386 if (flags & ADDPART_FLAG_WHOLEDISK) { 403 if (flags & ADDPART_FLAG_WHOLEDISK) {
387 err = device_create_file(&p->dev, &dev_attr_whole_disk); 404 err = device_create_file(pdev, &dev_attr_whole_disk);
388 if (err) 405 if (err)
389 goto out2; 406 goto out_del;
390 } 407 }
391 408
409 /* everything is up and running, commence */
410 INIT_RCU_HEAD(&p->rcu_head);
411 rcu_assign_pointer(ptbl->part[partno], p);
412
392 /* suppress uevent if the disk supresses it */ 413 /* suppress uevent if the disk supresses it */
393 if (!disk->dev.uevent_suppress) 414 if (!ddev->uevent_suppress)
394 kobject_uevent(&p->dev.kobj, KOBJ_ADD); 415 kobject_uevent(&pdev->kobj, KOBJ_ADD);
395 416
396 return 0; 417 return 0;
397 418
398out2: 419out_free:
399 device_del(&p->dev);
400out1:
401 put_device(&p->dev);
402 free_part_stats(p);
403out0:
404 kfree(p); 420 kfree(p);
405 return err; 421 return err;
422out_del:
423 kobject_put(p->holder_dir);
424 device_del(pdev);
425out_put:
426 put_device(pdev);
427 blk_free_devt(devt);
428 return err;
406} 429}
407 430
408/* Not exported, helper to add_disk(). */ 431/* Not exported, helper to add_disk(). */
409void register_disk(struct gendisk *disk) 432void register_disk(struct gendisk *disk)
410{ 433{
434 struct device *ddev = disk_to_dev(disk);
411 struct block_device *bdev; 435 struct block_device *bdev;
436 struct disk_part_iter piter;
437 struct hd_struct *part;
412 char *s; 438 char *s;
413 int i;
414 struct hd_struct *p;
415 int err; 439 int err;
416 440
417 disk->dev.parent = disk->driverfs_dev; 441 ddev->parent = disk->driverfs_dev;
418 disk->dev.devt = MKDEV(disk->major, disk->first_minor);
419 442
420 strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); 443 strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE);
421 /* ewww... some of these buggers have / in the name... */ 444 /* ewww... some of these buggers have / in the name... */
422 s = strchr(disk->dev.bus_id, '/'); 445 s = strchr(ddev->bus_id, '/');
423 if (s) 446 if (s)
424 *s = '!'; 447 *s = '!';
425 448
426 /* delay uevents, until we scanned partition table */ 449 /* delay uevents, until we scanned partition table */
427 disk->dev.uevent_suppress = 1; 450 ddev->uevent_suppress = 1;
428 451
429 if (device_add(&disk->dev)) 452 if (device_add(ddev))
430 return; 453 return;
431#ifndef CONFIG_SYSFS_DEPRECATED 454#ifndef CONFIG_SYSFS_DEPRECATED
432 err = sysfs_create_link(block_depr, &disk->dev.kobj, 455 err = sysfs_create_link(block_depr, &ddev->kobj,
433 kobject_name(&disk->dev.kobj)); 456 kobject_name(&ddev->kobj));
434 if (err) { 457 if (err) {
435 device_del(&disk->dev); 458 device_del(ddev);
436 return; 459 return;
437 } 460 }
438#endif 461#endif
439 disk_sysfs_add_subdirs(disk); 462 disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
463 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
440 464
441 /* No minors to use for partitions */ 465 /* No minors to use for partitions */
442 if (disk->minors == 1) 466 if (!disk_partitionable(disk))
443 goto exit; 467 goto exit;
444 468
445 /* No such device (e.g., media were just removed) */ 469 /* No such device (e.g., media were just removed) */
@@ -458,50 +482,66 @@ void register_disk(struct gendisk *disk)
458 482
459exit: 483exit:
460 /* announce disk after possible partitions are created */ 484 /* announce disk after possible partitions are created */
461 disk->dev.uevent_suppress = 0; 485 ddev->uevent_suppress = 0;
462 kobject_uevent(&disk->dev.kobj, KOBJ_ADD); 486 kobject_uevent(&ddev->kobj, KOBJ_ADD);
463 487
464 /* announce possible partitions */ 488 /* announce possible partitions */
465 for (i = 1; i < disk->minors; i++) { 489 disk_part_iter_init(&piter, disk, 0);
466 p = disk->part[i-1]; 490 while ((part = disk_part_iter_next(&piter)))
467 if (!p || !p->nr_sects) 491 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
468 continue; 492 disk_part_iter_exit(&piter);
469 kobject_uevent(&p->dev.kobj, KOBJ_ADD);
470 }
471} 493}
472 494
473int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 495int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
474{ 496{
497 struct disk_part_iter piter;
498 struct hd_struct *part;
475 struct parsed_partitions *state; 499 struct parsed_partitions *state;
476 int p, res; 500 int p, highest, res;
477 501
478 if (bdev->bd_part_count) 502 if (bdev->bd_part_count)
479 return -EBUSY; 503 return -EBUSY;
480 res = invalidate_partition(disk, 0); 504 res = invalidate_partition(disk, 0);
481 if (res) 505 if (res)
482 return res; 506 return res;
483 bdev->bd_invalidated = 0; 507
484 for (p = 1; p < disk->minors; p++) 508 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
485 delete_partition(disk, p); 509 while ((part = disk_part_iter_next(&piter)))
510 delete_partition(disk, part->partno);
511 disk_part_iter_exit(&piter);
512
486 if (disk->fops->revalidate_disk) 513 if (disk->fops->revalidate_disk)
487 disk->fops->revalidate_disk(disk); 514 disk->fops->revalidate_disk(disk);
515 check_disk_size_change(disk, bdev);
516 bdev->bd_invalidated = 0;
488 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 517 if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
489 return 0; 518 return 0;
490 if (IS_ERR(state)) /* I/O error reading the partition table */ 519 if (IS_ERR(state)) /* I/O error reading the partition table */
491 return -EIO; 520 return -EIO;
492 521
493 /* tell userspace that the media / partition table may have changed */ 522 /* tell userspace that the media / partition table may have changed */
494 kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); 523 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
495 524
525 /* Detect the highest partition number and preallocate
526 * disk->part_tbl. This is an optimization and not strictly
527 * necessary.
528 */
529 for (p = 1, highest = 0; p < state->limit; p++)
530 if (state->parts[p].size)
531 highest = p;
532
533 disk_expand_part_tbl(disk, highest);
534
535 /* add partitions */
496 for (p = 1; p < state->limit; p++) { 536 for (p = 1; p < state->limit; p++) {
497 sector_t size = state->parts[p].size; 537 sector_t size = state->parts[p].size;
498 sector_t from = state->parts[p].from; 538 sector_t from = state->parts[p].from;
499 if (!size) 539 if (!size)
500 continue; 540 continue;
501 if (from + size > get_capacity(disk)) { 541 if (from + size > get_capacity(disk)) {
502 printk(KERN_ERR " %s: p%d exceeds device capacity\n", 542 printk(KERN_WARNING
543 "%s: p%d exceeds device capacity\n",
503 disk->disk_name, p); 544 disk->disk_name, p);
504 continue;
505 } 545 }
506 res = add_partition(disk, p, from, size, state->parts[p].flags); 546 res = add_partition(disk, p, from, size, state->parts[p].flags);
507 if (res) { 547 if (res) {
@@ -541,25 +581,31 @@ EXPORT_SYMBOL(read_dev_sector);
541 581
542void del_gendisk(struct gendisk *disk) 582void del_gendisk(struct gendisk *disk)
543{ 583{
544 int p; 584 struct disk_part_iter piter;
585 struct hd_struct *part;
545 586
546 /* invalidate stuff */ 587 /* invalidate stuff */
547 for (p = disk->minors - 1; p > 0; p--) { 588 disk_part_iter_init(&piter, disk,
548 invalidate_partition(disk, p); 589 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
549 delete_partition(disk, p); 590 while ((part = disk_part_iter_next(&piter))) {
591 invalidate_partition(disk, part->partno);
592 delete_partition(disk, part->partno);
550 } 593 }
594 disk_part_iter_exit(&piter);
595
551 invalidate_partition(disk, 0); 596 invalidate_partition(disk, 0);
552 disk->capacity = 0; 597 blk_free_devt(disk_to_dev(disk)->devt);
598 set_capacity(disk, 0);
553 disk->flags &= ~GENHD_FL_UP; 599 disk->flags &= ~GENHD_FL_UP;
554 unlink_gendisk(disk); 600 unlink_gendisk(disk);
555 disk_stat_set_all(disk, 0); 601 part_stat_set_all(&disk->part0, 0);
556 disk->stamp = 0; 602 disk->part0.stamp = 0;
557 603
558 kobject_put(disk->holder_dir); 604 kobject_put(disk->part0.holder_dir);
559 kobject_put(disk->slave_dir); 605 kobject_put(disk->slave_dir);
560 disk->driverfs_dev = NULL; 606 disk->driverfs_dev = NULL;
561#ifndef CONFIG_SYSFS_DEPRECATED 607#ifndef CONFIG_SYSFS_DEPRECATED
562 sysfs_remove_link(block_depr, disk->dev.bus_id); 608 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
563#endif 609#endif
564 device_del(&disk->dev); 610 device_del(disk_to_dev(disk));
565} 611}
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 17ae8ecd9e8b..98dbe1a84528 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -5,15 +5,13 @@
5 * add_gd_partition adds a partitions details to the devices partition 5 * add_gd_partition adds a partitions details to the devices partition
6 * description. 6 * description.
7 */ 7 */
8enum { MAX_PART = 256 };
9
10struct parsed_partitions { 8struct parsed_partitions {
11 char name[BDEVNAME_SIZE]; 9 char name[BDEVNAME_SIZE];
12 struct { 10 struct {
13 sector_t from; 11 sector_t from;
14 sector_t size; 12 sector_t size;
15 int flags; 13 int flags;
16 } parts[MAX_PART]; 14 } parts[DISK_MAX_PARTS];
17 int next; 15 int next;
18 int limit; 16 int limit;
19}; 17};
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0d6eb33597c6..71c9be59c9c2 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
337 return 0; 337 return 0;
338} 338}
339 339
340/*
341 * Use precise platform statistics if available:
342 */
343#ifdef CONFIG_VIRT_CPU_ACCOUNTING
344static cputime_t task_utime(struct task_struct *p)
345{
346 return p->utime;
347}
348
349static cputime_t task_stime(struct task_struct *p)
350{
351 return p->stime;
352}
353#else
354static cputime_t task_utime(struct task_struct *p)
355{
356 clock_t utime = cputime_to_clock_t(p->utime),
357 total = utime + cputime_to_clock_t(p->stime);
358 u64 temp;
359
360 /*
361 * Use CFS's precise accounting:
362 */
363 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
364
365 if (total) {
366 temp *= utime;
367 do_div(temp, total);
368 }
369 utime = (clock_t)temp;
370
371 p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
372 return p->prev_utime;
373}
374
375static cputime_t task_stime(struct task_struct *p)
376{
377 clock_t stime;
378
379 /*
380 * Use CFS's precise accounting. (we subtract utime from
381 * the total, to make sure the total observed by userspace
382 * grows monotonically - apps rely on that):
383 */
384 stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
385 cputime_to_clock_t(task_utime(p));
386
387 if (stime >= 0)
388 p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
389
390 return p->prev_stime;
391}
392#endif
393
394static cputime_t task_gtime(struct task_struct *p)
395{
396 return p->gtime;
397}
398
399static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, 340static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
400 struct pid *pid, struct task_struct *task, int whole) 341 struct pid *pid, struct task_struct *task, int whole)
401{ 342{
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index bca0f81eb687..7821589a17d5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -547,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
547 547
548 for (tmp = dir->subdir; tmp; tmp = tmp->next) 548 for (tmp = dir->subdir; tmp; tmp = tmp->next)
549 if (strcmp(tmp->name, dp->name) == 0) { 549 if (strcmp(tmp->name, dp->name) == 0) {
550 printk(KERN_WARNING "proc_dir_entry '%s' already " 550 printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n",
551 "registered\n", dp->name); 551 dir->name, dp->name);
552 dump_stack(); 552 dump_stack();
553 break; 553 break;
554 } 554 }
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index ded969862960..29e20c6b1f7f 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -24,6 +24,7 @@
24#include <linux/tty.h> 24#include <linux/tty.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/mman.h> 26#include <linux/mman.h>
27#include <linux/quicklist.h>
27#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
28#include <linux/ioport.h> 29#include <linux/ioport.h>
29#include <linux/mm.h> 30#include <linux/mm.h>
@@ -182,6 +183,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
182 "SReclaimable: %8lu kB\n" 183 "SReclaimable: %8lu kB\n"
183 "SUnreclaim: %8lu kB\n" 184 "SUnreclaim: %8lu kB\n"
184 "PageTables: %8lu kB\n" 185 "PageTables: %8lu kB\n"
186#ifdef CONFIG_QUICKLIST
187 "Quicklists: %8lu kB\n"
188#endif
185 "NFS_Unstable: %8lu kB\n" 189 "NFS_Unstable: %8lu kB\n"
186 "Bounce: %8lu kB\n" 190 "Bounce: %8lu kB\n"
187 "WritebackTmp: %8lu kB\n" 191 "WritebackTmp: %8lu kB\n"
@@ -214,6 +218,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
214 K(global_page_state(NR_SLAB_RECLAIMABLE)), 218 K(global_page_state(NR_SLAB_RECLAIMABLE)),
215 K(global_page_state(NR_SLAB_UNRECLAIMABLE)), 219 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
216 K(global_page_state(NR_PAGETABLE)), 220 K(global_page_state(NR_PAGETABLE)),
221#ifdef CONFIG_QUICKLIST
222 K(quicklist_total_size()),
223#endif
217 K(global_page_state(NR_UNSTABLE_NFS)), 224 K(global_page_state(NR_UNSTABLE_NFS)),
218 K(global_page_state(NR_BOUNCE)), 225 K(global_page_state(NR_BOUNCE)),
219 K(global_page_state(NR_WRITEBACK_TEMP)), 226 K(global_page_state(NR_WRITEBACK_TEMP)),
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 52312ec93ff4..5145cb9125af 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = {
58 * size 0 on the assumption that it's going to be used for an mmap of shared 58 * size 0 on the assumption that it's going to be used for an mmap of shared
59 * memory 59 * memory
60 */ 60 */
61static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) 61int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
62{ 62{
63 struct pagevec lru_pvec; 63 struct pagevec lru_pvec;
64 unsigned long npages, xpages, loop, limit; 64 unsigned long npages, xpages, loop, limit;
diff --git a/fs/splice.c b/fs/splice.c
index 1bbc6f4bb09c..a1e701c27156 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -898,6 +898,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
898 if (unlikely(!(out->f_mode & FMODE_WRITE))) 898 if (unlikely(!(out->f_mode & FMODE_WRITE)))
899 return -EBADF; 899 return -EBADF;
900 900
901 if (unlikely(out->f_flags & O_APPEND))
902 return -EINVAL;
903
901 ret = rw_verify_area(WRITE, out, ppos, len); 904 ret = rw_verify_area(WRITE, out, ppos, len);
902 if (unlikely(ret < 0)) 905 if (unlikely(ret < 0))
903 return ret; 906 return ret;
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 154098157473..73db464cd08b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
302 int subtract_lebs; 302 int subtract_lebs;
303 long long available; 303 long long available;
304 304
305 /*
306 * Force the amount available to the total size reported if the used
307 * space is zero.
308 */
309 if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
310 c->budg_data_growth + c->budg_dd_growth == 0) {
311 /* Do the same calculation as for c->block_cnt */
312 available = c->main_lebs - 2;
313 available *= c->leb_size - c->dark_wm;
314 return available;
315 }
316
317 available = c->main_bytes - c->lst.total_used; 305 available = c->main_bytes - c->lst.total_used;
318 306
319 /* 307 /*
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
714} 702}
715 703
716/** 704/**
717 * ubifs_budg_get_free_space - return amount of free space. 705 * ubifs_reported_space - calculate reported free space.
706 * @c: the UBIFS file-system description object
707 * @free: amount of free space
708 *
709 * This function calculates amount of free space which will be reported to
710 * user-space. User-space application tend to expect that if the file-system
711 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
712 * are able to write a file of size N. UBIFS attaches node headers to each data
713 * node and it has to write indexind nodes as well. This introduces additional
714 * overhead, and UBIFS it has to report sligtly less free space to meet the
715 * above expectetion.
716 *
717 * This function assumes free space is made up of uncompressed data nodes and
718 * full index nodes (one per data node, tripled because we always allow enough
719 * space to write the index thrice).
720 *
721 * Note, the calculation is pessimistic, which means that most of the time
722 * UBIFS reports less space than it actually has.
723 */
724long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
725{
726 int divisor, factor, f;
727
728 /*
729 * Reported space size is @free * X, where X is UBIFS block size
730 * divided by UBIFS block size + all overhead one data block
731 * introduces. The overhead is the node header + indexing overhead.
732 *
733 * Indexing overhead calculations are based on the following formula:
734 * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
735 * of data nodes, f - fanout. Because effective UBIFS fanout is twice
736 * as less than maximum fanout, we assume that each data node
737 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
738 * Note, the multiplier 3 is because UBIFS reseves thrice as more space
739 * for the index.
740 */
741 f = c->fanout > 3 ? c->fanout >> 1 : 2;
742 factor = UBIFS_BLOCK_SIZE;
743 divisor = UBIFS_MAX_DATA_NODE_SZ;
744 divisor += (c->max_idx_node_sz * 3) / (f - 1);
745 free *= factor;
746 do_div(free, divisor);
747 return free;
748}
749
750/**
751 * ubifs_get_free_space - return amount of free space.
718 * @c: UBIFS file-system description object 752 * @c: UBIFS file-system description object
719 * 753 *
720 * This function returns amount of free space on the file-system. 754 * This function calculates amount of free space to report to user-space.
755 *
756 * Because UBIFS may introduce substantial overhead (the index, node headers,
757 * alighment, wastage at the end of eraseblocks, etc), it cannot report real
758 * amount of free flash space it has (well, because not all dirty space is
759 * reclamable, UBIFS does not actually know the real amount). If UBIFS did so,
760 * it would bread user expectetion about what free space is. Users seem to
761 * accustomed to assume that if the file-system reports N bytes of free space,
762 * they would be able to fit a file of N bytes to the FS. This almost works for
763 * traditional file-systems, because they have way less overhead than UBIFS.
764 * So, to keep users happy, UBIFS tries to take the overhead into account.
721 */ 765 */
722long long ubifs_budg_get_free_space(struct ubifs_info *c) 766long long ubifs_get_free_space(struct ubifs_info *c)
723{ 767{
724 int min_idx_lebs, rsvd_idx_lebs; 768 int min_idx_lebs, rsvd_idx_lebs, lebs;
725 long long available, outstanding, free; 769 long long available, outstanding, free;
726 770
727 /* Do exactly the same calculations as in 'do_budget_space()' */
728 spin_lock(&c->space_lock); 771 spin_lock(&c->space_lock);
729 min_idx_lebs = ubifs_calc_min_idx_lebs(c); 772 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
773 outstanding = c->budg_data_growth + c->budg_dd_growth;
730 774
731 if (min_idx_lebs > c->lst.idx_lebs) 775 /*
732 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; 776 * Force the amount available to the total size reported if the used
733 else 777 * space is zero.
734 rsvd_idx_lebs = 0; 778 */
735 779 if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
736 if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
737 - c->lst.taken_empty_lebs) {
738 spin_unlock(&c->space_lock); 780 spin_unlock(&c->space_lock);
739 return 0; 781 return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
740 } 782 }
741 783
742 available = ubifs_calc_available(c, min_idx_lebs); 784 available = ubifs_calc_available(c, min_idx_lebs);
743 outstanding = c->budg_data_growth + c->budg_dd_growth; 785
744 c->min_idx_lebs = min_idx_lebs; 786 /*
787 * When reporting free space to user-space, UBIFS guarantees that it is
788 * possible to write a file of free space size. This means that for
789 * empty LEBs we may use more precise calculations than
790 * 'ubifs_calc_available()' is using. Namely, we know that in empty
791 * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
792 * Thus, amend the available space.
793 *
794 * Note, the calculations below are similar to what we have in
795 * 'do_budget_space()', so refer there for comments.
796 */
797 if (min_idx_lebs > c->lst.idx_lebs)
798 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
799 else
800 rsvd_idx_lebs = 0;
801 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
802 c->lst.taken_empty_lebs;
803 lebs -= rsvd_idx_lebs;
804 available += lebs * (c->dark_wm - c->leb_overhead);
745 spin_unlock(&c->space_lock); 805 spin_unlock(&c->space_lock);
746 806
747 if (available > outstanding) 807 if (available > outstanding)
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index b9cb77473758..d7f7645779f2 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
538 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); 538 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n);
539 for (i = 0; i < n; i++) 539 for (i = 0; i < n; i++)
540 printk(KERN_DEBUG "\t ino %llu\n", 540 printk(KERN_DEBUG "\t ino %llu\n",
541 le64_to_cpu(orph->inos[i])); 541 (unsigned long long)le64_to_cpu(orph->inos[i]));
542 break; 542 break;
543 } 543 }
544 default: 544 default:
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 5c96f1fb7016..526c01ec8003 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -426,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
426 426
427 while (1) { 427 while (1) {
428 dbg_gen("feed '%s', ino %llu, new f_pos %#x", 428 dbg_gen("feed '%s', ino %llu, new f_pos %#x",
429 dent->name, le64_to_cpu(dent->inum), 429 dent->name, (unsigned long long)le64_to_cpu(dent->inum),
430 key_hash_flash(c, &dent->key)); 430 key_hash_flash(c, &dent->key));
431 ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); 431 ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum);
432 432
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
587 if (err) { 587 if (err) {
588 if (err != -ENOSPC) 588 if (err != -ENOSPC)
589 return err; 589 return err;
590 err = 0;
591 budgeted = 0; 590 budgeted = 0;
592 } 591 }
593 592
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 4071d1cae29f..3d698e2022b1 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
793 int err; 793 int err;
794 struct ubifs_budget_req req; 794 struct ubifs_budget_req req;
795 loff_t old_size = inode->i_size, new_size = attr->ia_size; 795 loff_t old_size = inode->i_size, new_size = attr->ia_size;
796 int offset = new_size & (UBIFS_BLOCK_SIZE - 1); 796 int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
797 struct ubifs_inode *ui = ubifs_inode(inode); 797 struct ubifs_inode *ui = ubifs_inode(inode);
798 798
799 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); 799 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
811 /* A funny way to budget for truncation node */ 811 /* A funny way to budget for truncation node */
812 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; 812 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
813 err = ubifs_budget_space(c, &req); 813 err = ubifs_budget_space(c, &req);
814 if (err) 814 if (err) {
815 return err; 815 /*
816 * Treat truncations to zero as deletion and always allow them,
817 * just like we do for '->unlink()'.
818 */
819 if (new_size || err != -ENOSPC)
820 return err;
821 budgeted = 0;
822 }
816 823
817 err = vmtruncate(inode, new_size); 824 err = vmtruncate(inode, new_size);
818 if (err) 825 if (err)
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
869 err = ubifs_jnl_truncate(c, inode, old_size, new_size); 876 err = ubifs_jnl_truncate(c, inode, old_size, new_size);
870 mutex_unlock(&ui->ui_mutex); 877 mutex_unlock(&ui->ui_mutex);
871out_budg: 878out_budg:
872 ubifs_release_budget(c, &req); 879 if (budgeted)
880 ubifs_release_budget(c, &req);
881 else {
882 c->nospace = c->nospace_rp = 0;
883 smp_wmb();
884 }
873 return err; 885 return err;
874} 886}
875 887
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index adee7b5ddeab..47814cde2407 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty 211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty
212 * or do not have an LEB which satisfies the @min_space criteria. 212 * or do not have an LEB which satisfies the @min_space criteria.
213 * 213 *
214 * Note: 214 * Note, LEBs which have less than dead watermark of free + dirty space are
215 * o LEBs which have less than dead watermark of dirty space are never picked 215 * never picked by this function.
216 * by this function;
217 *
218 * Returns zero and the LEB properties of
219 * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
220 * negative error code in case of other failures. The returned LEB is marked as
221 * "taken".
222 * 216 *
223 * The additional @pick_free argument controls if this function has to return a 217 * The additional @pick_free argument controls if this function has to return a
224 * free or freeable LEB if one is present. For example, GC must to set it to %1, 218 * free or freeable LEB if one is present. For example, GC must to set it to %1,
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
231 * 225 *
232 * In addition @pick_free is set to %2 by the recovery process in order to 226 * In addition @pick_free is set to %2 by the recovery process in order to
233 * recover gc_lnum in which case an index LEB must not be returned. 227 * recover gc_lnum in which case an index LEB must not be returned.
228 *
229 * This function returns zero and the LEB properties of found dirty LEB in case
230 * of success, %-ENOSPC if no dirty LEB was found and a negative error code in
231 * case of other failures. The returned LEB is marked as "taken".
234 */ 232 */
235int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, 233int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
236 int min_space, int pick_free) 234 int min_space, int pick_free)
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
245 int lebs, rsvd_idx_lebs = 0; 243 int lebs, rsvd_idx_lebs = 0;
246 244
247 spin_lock(&c->space_lock); 245 spin_lock(&c->space_lock);
248 lebs = c->lst.empty_lebs; 246 lebs = c->lst.empty_lebs + c->idx_gc_cnt;
249 lebs += c->freeable_cnt - c->lst.taken_empty_lebs; 247 lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
250 248
251 /* 249 /*
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
317 lp = idx_lp; 315 lp = idx_lp;
318 316
319 if (lp) { 317 if (lp) {
320 ubifs_assert(lp->dirty >= c->dead_wm); 318 ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
321 goto found; 319 goto found;
322 } 320 }
323 321
@@ -509,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
509 rsvd_idx_lebs = 0; 507 rsvd_idx_lebs = 0;
510 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
511 c->lst.taken_empty_lebs; 509 c->lst.taken_empty_lebs;
512 ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs);
513 if (rsvd_idx_lebs < lebs) 510 if (rsvd_idx_lebs < lebs)
514 /* 511 /*
515 * OK to allocate an empty LEB, but we still don't want to go 512 * OK to allocate an empty LEB, but we still don't want to go
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index d0f3dac29081..02aba36fe3d4 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -334,15 +334,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
334 334
335 err = move_nodes(c, sleb); 335 err = move_nodes(c, sleb);
336 if (err) 336 if (err)
337 goto out; 337 goto out_inc_seq;
338 338
339 err = gc_sync_wbufs(c); 339 err = gc_sync_wbufs(c);
340 if (err) 340 if (err)
341 goto out; 341 goto out_inc_seq;
342 342
343 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); 343 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
344 if (err) 344 if (err)
345 goto out; 345 goto out_inc_seq;
346
347 /* Allow for races with TNC */
348 c->gced_lnum = lnum;
349 smp_wmb();
350 c->gc_seq += 1;
351 smp_wmb();
346 352
347 if (c->gc_lnum == -1) { 353 if (c->gc_lnum == -1) {
348 c->gc_lnum = lnum; 354 c->gc_lnum = lnum;
@@ -363,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
363out: 369out:
364 ubifs_scan_destroy(sleb); 370 ubifs_scan_destroy(sleb);
365 return err; 371 return err;
372
373out_inc_seq:
374 /* We may have moved at least some nodes so allow for races with TNC */
375 c->gced_lnum = lnum;
376 smp_wmb();
377 c->gc_seq += 1;
378 smp_wmb();
379 goto out;
366} 380}
367 381
368/** 382/**
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 87dabf9fe742..4c12a9215d7f 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c,
284} 284}
285 285
286/** 286/**
287 * ubifs_reported_space - calculate reported free space.
288 * @c: the UBIFS file-system description object
289 * @free: amount of free space
290 *
291 * This function calculates amount of free space which will be reported to
292 * user-space. User-space application tend to expect that if the file-system
293 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
294 * are able to write a file of size N. UBIFS attaches node headers to each data
295 * node and it has to write indexind nodes as well. This introduces additional
296 * overhead, and UBIFS it has to report sligtly less free space to meet the
297 * above expectetion.
298 *
299 * This function assumes free space is made up of uncompressed data nodes and
300 * full index nodes (one per data node, doubled because we always allow enough
301 * space to write the index twice).
302 *
303 * Note, the calculation is pessimistic, which means that most of the time
304 * UBIFS reports less space than it actually has.
305 */
306static inline long long ubifs_reported_space(const struct ubifs_info *c,
307 uint64_t free)
308{
309 int divisor, factor;
310
311 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3);
312 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
313 do_div(free, divisor);
314
315 return free * factor;
316}
317
318/**
319 * ubifs_current_time - round current time to time granularity. 287 * ubifs_current_time - round current time to time granularity.
320 * @inode: inode 288 * @inode: inode
321 */ 289 */
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode)
325 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; 293 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
326} 294}
327 295
296/**
297 * ubifs_tnc_lookup - look up a file-system node.
298 * @c: UBIFS file-system description object
299 * @key: node key to lookup
300 * @node: the node is returned here
301 *
302 * This function look up and reads node with key @key. The caller has to make
303 * sure the @node buffer is large enough to fit the node. Returns zero in case
304 * of success, %-ENOENT if the node was not found, and a negative error code in
305 * case of failure.
306 */
307static inline int ubifs_tnc_lookup(struct ubifs_info *c,
308 const union ubifs_key *key, void *node)
309{
310 return ubifs_tnc_locate(c, key, node, NULL, NULL);
311}
312
328#endif /* __UBIFS_MISC_H__ */ 313#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index f71e6b8822c4..3f4902060c7a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
370{ 370{
371 struct ubifs_info *c = dentry->d_sb->s_fs_info; 371 struct ubifs_info *c = dentry->d_sb->s_fs_info;
372 unsigned long long free; 372 unsigned long long free;
373 __le32 *uuid = (__le32 *)c->uuid;
373 374
374 free = ubifs_budg_get_free_space(c); 375 free = ubifs_get_free_space(c);
375 dbg_gen("free space %lld bytes (%lld blocks)", 376 dbg_gen("free space %lld bytes (%lld blocks)",
376 free, free >> UBIFS_BLOCK_SHIFT); 377 free, free >> UBIFS_BLOCK_SHIFT);
377 378
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
386 buf->f_files = 0; 387 buf->f_files = 0;
387 buf->f_ffree = 0; 388 buf->f_ffree = 0;
388 buf->f_namelen = UBIFS_MAX_NLEN; 389 buf->f_namelen = UBIFS_MAX_NLEN;
389 390 buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
391 buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
390 return 0; 392 return 0;
391} 393}
392 394
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c)
530 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); 532 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
531 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); 533 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
532 534
535 /*
536 * Calculate how many bytes would be wasted at the end of LEB if it was
537 * fully filled with data nodes of maximum size. This is used in
538 * calculations when reporting free space.
539 */
540 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
533 return 0; 541 return 0;
534} 542}
535 543
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c)
647 * internally because it does not make much sense for UBIFS, but it is 655 * internally because it does not make much sense for UBIFS, but it is
648 * necessary to report something for the 'statfs()' call. 656 * necessary to report something for the 'statfs()' call.
649 * 657 *
650 * Subtract the LEB reserved for GC and the LEB which is reserved for 658 * Subtract the LEB reserved for GC, the LEB which is reserved for
651 * deletions. 659 * deletions, and assume only one journal head is available.
652 *
653 * Review 'ubifs_calc_available()' if changing this calculation.
654 */ 660 */
655 tmp64 = c->main_lebs - 2; 661 tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1;
656 tmp64 *= (uint64_t)c->leb_size - c->dark_wm; 662 tmp64 *= (uint64_t)c->leb_size - c->leb_overhead;
657 tmp64 = ubifs_reported_space(c, tmp64); 663 tmp64 = ubifs_reported_space(c, tmp64);
658 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; 664 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
659 665
@@ -1018,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c)
1018 goto out_dereg; 1024 goto out_dereg;
1019 } 1025 }
1020 1026
1027 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
1021 if (!mounted_read_only) { 1028 if (!mounted_read_only) {
1022 err = alloc_wbufs(c); 1029 err = alloc_wbufs(c);
1023 if (err) 1030 if (err)
1024 goto out_cbuf; 1031 goto out_cbuf;
1025 1032
1026 /* Create background thread */ 1033 /* Create background thread */
1027 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num,
1028 c->vi.vol_id);
1029 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); 1034 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1030 if (!c->bgt) 1035 if (!c->bgt)
1031 c->bgt = ERR_PTR(-EINVAL); 1036 c->bgt = ERR_PTR(-EINVAL);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e909f4a96443..7634c5970887 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
506 if (keys_cmp(c, key, &node_key) != 0) 506 if (keys_cmp(c, key, &node_key) != 0)
507 ret = 0; 507 ret = 0;
508 } 508 }
509 if (ret == 0) 509 if (ret == 0 && c->replaying)
510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s", 510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); 511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
512 return ret; 512 return ret;
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
1382} 1382}
1383 1383
1384/** 1384/**
1385 * ubifs_tnc_lookup - look up a file-system node. 1385 * maybe_leb_gced - determine if a LEB may have been garbage collected.
1386 * @c: UBIFS file-system description object 1386 * @c: UBIFS file-system description object
1387 * @key: node key to lookup 1387 * @lnum: LEB number
1388 * @node: the node is returned here 1388 * @gc_seq1: garbage collection sequence number
1389 * 1389 *
1390 * This function look up and reads node with key @key. The caller has to make 1390 * This function determines if @lnum may have been garbage collected since
1391 * sure the @node buffer is large enough to fit the node. Returns zero in case 1391 * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
1392 * of success, %-ENOENT if the node was not found, and a negative error code in 1392 * %0 is returned.
1393 * case of failure.
1394 */ 1393 */
1395int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, 1394static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
1396 void *node)
1397{ 1395{
1398 int found, n, err; 1396 int gc_seq2, gced_lnum;
1399 struct ubifs_znode *znode;
1400 struct ubifs_zbranch zbr, *zt;
1401 1397
1402 mutex_lock(&c->tnc_mutex); 1398 gced_lnum = c->gced_lnum;
1403 found = ubifs_lookup_level0(c, key, &znode, &n); 1399 smp_rmb();
1404 if (!found) { 1400 gc_seq2 = c->gc_seq;
1405 err = -ENOENT; 1401 /* Same seq means no GC */
1406 goto out; 1402 if (gc_seq1 == gc_seq2)
1407 } else if (found < 0) { 1403 return 0;
1408 err = found; 1404 /* Different by more than 1 means we don't know */
1409 goto out; 1405 if (gc_seq1 + 1 != gc_seq2)
1410 } 1406 return 1;
1411 zt = &znode->zbranch[n]; 1407 /*
1412 if (is_hash_key(c, key)) { 1408 * We have seen the sequence number has increased by 1. Now we need to
1413 /* 1409 * be sure we read the right LEB number, so read it again.
1414 * In this case the leaf node cache gets used, so we pass the 1410 */
1415 * address of the zbranch and keep the mutex locked 1411 smp_rmb();
1416 */ 1412 if (gced_lnum != c->gced_lnum)
1417 err = tnc_read_node_nm(c, zt, node); 1413 return 1;
1418 goto out; 1414 /* Finally we can check lnum */
1419 } 1415 if (gced_lnum == lnum)
1420 zbr = znode->zbranch[n]; 1416 return 1;
1421 mutex_unlock(&c->tnc_mutex); 1417 return 0;
1422
1423 err = ubifs_tnc_read_node(c, &zbr, node);
1424 return err;
1425
1426out:
1427 mutex_unlock(&c->tnc_mutex);
1428 return err;
1429} 1418}
1430 1419
1431/** 1420/**
@@ -1436,16 +1425,19 @@ out:
1436 * @lnum: LEB number is returned here 1425 * @lnum: LEB number is returned here
1437 * @offs: offset is returned here 1426 * @offs: offset is returned here
1438 * 1427 *
1439 * This function is the same as 'ubifs_tnc_lookup()' but it returns the node 1428 * This function look up and reads node with key @key. The caller has to make
1440 * location also. See 'ubifs_tnc_lookup()'. 1429 * sure the @node buffer is large enough to fit the node. Returns zero in case
1430 * of success, %-ENOENT if the node was not found, and a negative error code in
1431 * case of failure. The node location can be returned in @lnum and @offs.
1441 */ 1432 */
1442int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1433int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1443 void *node, int *lnum, int *offs) 1434 void *node, int *lnum, int *offs)
1444{ 1435{
1445 int found, n, err; 1436 int found, n, err, safely = 0, gc_seq1;
1446 struct ubifs_znode *znode; 1437 struct ubifs_znode *znode;
1447 struct ubifs_zbranch zbr, *zt; 1438 struct ubifs_zbranch zbr, *zt;
1448 1439
1440again:
1449 mutex_lock(&c->tnc_mutex); 1441 mutex_lock(&c->tnc_mutex);
1450 found = ubifs_lookup_level0(c, key, &znode, &n); 1442 found = ubifs_lookup_level0(c, key, &znode, &n);
1451 if (!found) { 1443 if (!found) {
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1456 goto out; 1448 goto out;
1457 } 1449 }
1458 zt = &znode->zbranch[n]; 1450 zt = &znode->zbranch[n];
1451 if (lnum) {
1452 *lnum = zt->lnum;
1453 *offs = zt->offs;
1454 }
1459 if (is_hash_key(c, key)) { 1455 if (is_hash_key(c, key)) {
1460 /* 1456 /*
1461 * In this case the leaf node cache gets used, so we pass the 1457 * In this case the leaf node cache gets used, so we pass the
1462 * address of the zbranch and keep the mutex locked 1458 * address of the zbranch and keep the mutex locked
1463 */ 1459 */
1464 *lnum = zt->lnum;
1465 *offs = zt->offs;
1466 err = tnc_read_node_nm(c, zt, node); 1460 err = tnc_read_node_nm(c, zt, node);
1467 goto out; 1461 goto out;
1468 } 1462 }
1463 if (safely) {
1464 err = ubifs_tnc_read_node(c, zt, node);
1465 goto out;
1466 }
1467 /* Drop the TNC mutex prematurely and race with garbage collection */
1469 zbr = znode->zbranch[n]; 1468 zbr = znode->zbranch[n];
1469 gc_seq1 = c->gc_seq;
1470 mutex_unlock(&c->tnc_mutex); 1470 mutex_unlock(&c->tnc_mutex);
1471 1471
1472 *lnum = zbr.lnum; 1472 if (ubifs_get_wbuf(c, zbr.lnum)) {
1473 *offs = zbr.offs; 1473 /* We do not GC journal heads */
1474 err = ubifs_tnc_read_node(c, &zbr, node);
1475 return err;
1476 }
1474 1477
1475 err = ubifs_tnc_read_node(c, &zbr, node); 1478 err = fallible_read_node(c, key, &zbr, node);
1476 return err; 1479 if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
1480 /*
1481 * The node may have been GC'ed out from under us so try again
1482 * while keeping the TNC mutex locked.
1483 */
1484 safely = 1;
1485 goto again;
1486 }
1487 return 0;
1477 1488
1478out: 1489out:
1479 mutex_unlock(&c->tnc_mutex); 1490 mutex_unlock(&c->tnc_mutex);
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1498{ 1509{
1499 int found, n, err; 1510 int found, n, err;
1500 struct ubifs_znode *znode; 1511 struct ubifs_znode *znode;
1501 struct ubifs_zbranch zbr;
1502 1512
1503 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); 1513 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
1504 mutex_lock(&c->tnc_mutex); 1514 mutex_lock(&c->tnc_mutex);
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1522 goto out_unlock; 1532 goto out_unlock;
1523 } 1533 }
1524 1534
1525 zbr = znode->zbranch[n]; 1535 err = tnc_read_node_nm(c, &znode->zbranch[n], node);
1526 mutex_unlock(&c->tnc_mutex);
1527
1528 err = tnc_read_node_nm(c, &zbr, node);
1529 return err;
1530 1536
1531out_unlock: 1537out_unlock:
1532 mutex_unlock(&c->tnc_mutex); 1538 mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index bd2121f3426e..a9ecbd9af20d 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -87,7 +87,7 @@
87#define UBIFS_SK_LEN 8 87#define UBIFS_SK_LEN 8
88 88
89/* Minimum index tree fanout */ 89/* Minimum index tree fanout */
90#define UBIFS_MIN_FANOUT 2 90#define UBIFS_MIN_FANOUT 3
91 91
92/* Maximum number of levels in UBIFS indexing B-tree */ 92/* Maximum number of levels in UBIFS indexing B-tree */
93#define UBIFS_MAX_LEVELS 512 93#define UBIFS_MAX_LEVELS 512
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d7f706f7a302..17c620b93eec 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -995,6 +995,9 @@ struct ubifs_mount_opts {
995 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary 995 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
996 * @max_inode_sz: maximum possible inode size in bytes 996 * @max_inode_sz: maximum possible inode size in bytes
997 * @max_znode_sz: size of znode in bytes 997 * @max_znode_sz: size of znode in bytes
998 *
999 * @leb_overhead: how many bytes are wasted in an LEB when it is filled with
1000 * data nodes of maximum size - used in free space reporting
998 * @dead_wm: LEB dead space watermark 1001 * @dead_wm: LEB dead space watermark
999 * @dark_wm: LEB dark space watermark 1002 * @dark_wm: LEB dark space watermark
1000 * @block_cnt: count of 4KiB blocks on the FS 1003 * @block_cnt: count of 4KiB blocks on the FS
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts {
1028 * @sbuf: a buffer of LEB size used by GC and replay for scanning 1031 * @sbuf: a buffer of LEB size used by GC and replay for scanning
1029 * @idx_gc: list of index LEBs that have been garbage collected 1032 * @idx_gc: list of index LEBs that have been garbage collected
1030 * @idx_gc_cnt: number of elements on the idx_gc list 1033 * @idx_gc_cnt: number of elements on the idx_gc list
1034 * @gc_seq: incremented for every non-index LEB garbage collected
1035 * @gced_lnum: last non-index LEB that was garbage collected
1031 * 1036 *
1032 * @infos_list: links all 'ubifs_info' objects 1037 * @infos_list: links all 'ubifs_info' objects
1033 * @umount_mutex: serializes shrinker and un-mount 1038 * @umount_mutex: serializes shrinker and un-mount
@@ -1224,6 +1229,8 @@ struct ubifs_info {
1224 int max_idx_node_sz; 1229 int max_idx_node_sz;
1225 long long max_inode_sz; 1230 long long max_inode_sz;
1226 int max_znode_sz; 1231 int max_znode_sz;
1232
1233 int leb_overhead;
1227 int dead_wm; 1234 int dead_wm;
1228 int dark_wm; 1235 int dark_wm;
1229 int block_cnt; 1236 int block_cnt;
@@ -1257,6 +1264,8 @@ struct ubifs_info {
1257 void *sbuf; 1264 void *sbuf;
1258 struct list_head idx_gc; 1265 struct list_head idx_gc;
1259 int idx_gc_cnt; 1266 int idx_gc_cnt;
1267 volatile int gc_seq;
1268 volatile int gced_lnum;
1260 1269
1261 struct list_head infos_list; 1270 struct list_head infos_list;
1262 struct mutex umount_mutex; 1271 struct mutex umount_mutex;
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
1434 struct ubifs_budget_req *req); 1443 struct ubifs_budget_req *req);
1435void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, 1444void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
1436 struct ubifs_budget_req *req); 1445 struct ubifs_budget_req *req);
1437long long ubifs_budg_get_free_space(struct ubifs_info *c); 1446long long ubifs_get_free_space(struct ubifs_info *c);
1438int ubifs_calc_min_idx_lebs(struct ubifs_info *c); 1447int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
1439void ubifs_convert_page_budget(struct ubifs_info *c); 1448void ubifs_convert_page_budget(struct ubifs_info *c);
1449long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free);
1440long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); 1450long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
1441 1451
1442/* find.c */ 1452/* find.c */
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
1451/* tnc.c */ 1461/* tnc.c */
1452int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, 1462int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1453 struct ubifs_znode **zn, int *n); 1463 struct ubifs_znode **zn, int *n);
1454int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
1455 void *node);
1456int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, 1464int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1457 void *node, const struct qstr *nm); 1465 void *node, const struct qstr *nm);
1458int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1466int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 0ed6e146a0d9..eb91f3b70320 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -211,6 +211,7 @@ const struct file_operations udf_file_operations = {
211 .release = udf_release_file, 211 .release = udf_release_file,
212 .fsync = udf_fsync_file, 212 .fsync = udf_fsync_file,
213 .splice_read = generic_file_splice_read, 213 .splice_read = generic_file_splice_read,
214 .llseek = generic_file_llseek,
214}; 215};
215 216
216const struct inode_operations udf_file_inode_operations = { 217const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index eb9cfa23dc3d..a4f2b3ce45b0 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -76,11 +76,24 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
76 *err = -ENOSPC; 76 *err = -ENOSPC;
77 77
78 iinfo = UDF_I(inode); 78 iinfo = UDF_I(inode);
79 iinfo->i_unique = 0; 79 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) {
80 iinfo->i_lenExtents = 0; 80 iinfo->i_efe = 1;
81 iinfo->i_next_alloc_block = 0; 81 if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev)
82 iinfo->i_next_alloc_goal = 0; 82 sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE;
83 iinfo->i_strat4096 = 0; 83 iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
84 sizeof(struct extendedFileEntry),
85 GFP_KERNEL);
86 } else {
87 iinfo->i_efe = 0;
88 iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
89 sizeof(struct fileEntry),
90 GFP_KERNEL);
91 }
92 if (!iinfo->i_ext.i_data) {
93 iput(inode);
94 *err = -ENOMEM;
95 return NULL;
96 }
84 97
85 block = udf_new_block(dir->i_sb, NULL, 98 block = udf_new_block(dir->i_sb, NULL,
86 dinfo->i_location.partitionReferenceNum, 99 dinfo->i_location.partitionReferenceNum,
@@ -111,6 +124,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
111 lvhd->uniqueID = cpu_to_le64(uniqueID); 124 lvhd->uniqueID = cpu_to_le64(uniqueID);
112 mark_buffer_dirty(sbi->s_lvid_bh); 125 mark_buffer_dirty(sbi->s_lvid_bh);
113 } 126 }
127 mutex_unlock(&sbi->s_alloc_mutex);
114 inode->i_mode = mode; 128 inode->i_mode = mode;
115 inode->i_uid = current->fsuid; 129 inode->i_uid = current->fsuid;
116 if (dir->i_mode & S_ISGID) { 130 if (dir->i_mode & S_ISGID) {
@@ -129,25 +143,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
129 iinfo->i_lenEAttr = 0; 143 iinfo->i_lenEAttr = 0;
130 iinfo->i_lenAlloc = 0; 144 iinfo->i_lenAlloc = 0;
131 iinfo->i_use = 0; 145 iinfo->i_use = 0;
132 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) {
133 iinfo->i_efe = 1;
134 if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev)
135 sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE;
136 iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
137 sizeof(struct extendedFileEntry),
138 GFP_KERNEL);
139 } else {
140 iinfo->i_efe = 0;
141 iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
142 sizeof(struct fileEntry),
143 GFP_KERNEL);
144 }
145 if (!iinfo->i_ext.i_data) {
146 iput(inode);
147 *err = -ENOMEM;
148 mutex_unlock(&sbi->s_alloc_mutex);
149 return NULL;
150 }
151 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) 146 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB))
152 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; 147 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
153 else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) 148 else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
@@ -158,7 +153,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
158 iinfo->i_crtime = current_fs_time(inode->i_sb); 153 iinfo->i_crtime = current_fs_time(inode->i_sb);
159 insert_inode_hash(inode); 154 insert_inode_hash(inode);
160 mark_inode_dirty(inode); 155 mark_inode_dirty(inode);
161 mutex_unlock(&sbi->s_alloc_mutex);
162 156
163 if (DQUOT_ALLOC_INODE(inode)) { 157 if (DQUOT_ALLOC_INODE(inode)) {
164 DQUOT_DROP(inode); 158 DQUOT_DROP(inode);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index f42f80a3b1fa..a44d68eb50b5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1338,6 +1338,10 @@ __xfs_get_blocks(
1338 offset = (xfs_off_t)iblock << inode->i_blkbits; 1338 offset = (xfs_off_t)iblock << inode->i_blkbits;
1339 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1339 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
1340 size = bh_result->b_size; 1340 size = bh_result->b_size;
1341
1342 if (!create && direct && offset >= i_size_read(inode))
1343 return 0;
1344
1341 error = xfs_iomap(XFS_I(inode), offset, size, 1345 error = xfs_iomap(XFS_I(inode), offset, size,
1342 create ? flags : BMAPI_READ, &iomap, &niomap); 1346 create ? flags : BMAPI_READ, &iomap, &niomap);
1343 if (error) 1347 if (error)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 986061ae1b9b..36d5fcd3f593 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1001,12 +1001,13 @@ xfs_buf_iodone_work(
1001 * We can get an EOPNOTSUPP to ordered writes. Here we clear the 1001 * We can get an EOPNOTSUPP to ordered writes. Here we clear the
1002 * ordered flag and reissue them. Because we can't tell the higher 1002 * ordered flag and reissue them. Because we can't tell the higher
1003 * layers directly that they should not issue ordered I/O anymore, they 1003 * layers directly that they should not issue ordered I/O anymore, they
1004 * need to check if the ordered flag was cleared during I/O completion. 1004 * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
1005 */ 1005 */
1006 if ((bp->b_error == EOPNOTSUPP) && 1006 if ((bp->b_error == EOPNOTSUPP) &&
1007 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { 1007 (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
1008 XB_TRACE(bp, "ordered_retry", bp->b_iodone); 1008 XB_TRACE(bp, "ordered_retry", bp->b_iodone);
1009 bp->b_flags &= ~XBF_ORDERED; 1009 bp->b_flags &= ~XBF_ORDERED;
1010 bp->b_flags |= _XFS_BARRIER_FAILED;
1010 xfs_buf_iorequest(bp); 1011 xfs_buf_iorequest(bp);
1011 } else if (bp->b_iodone) 1012 } else if (bp->b_iodone)
1012 (*(bp->b_iodone))(bp); 1013 (*(bp->b_iodone))(bp);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index fe0109956656..456519a088c7 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -85,6 +85,14 @@ typedef enum {
85 * modifications being lost. 85 * modifications being lost.
86 */ 86 */
87 _XBF_PAGE_LOCKED = (1 << 22), 87 _XBF_PAGE_LOCKED = (1 << 22),
88
89 /*
90 * If we try a barrier write, but it fails we have to communicate
91 * this to the upper layers. Unfortunately b_error gets overwritten
92 * when the buffer is re-issued so we have to add another flag to
93 * keep this information.
94 */
95 _XFS_BARRIER_FAILED = (1 << 23),
88} xfs_buf_flags_t; 96} xfs_buf_flags_t;
89 97
90typedef enum { 98typedef enum {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 73c65f19e549..18d3c8487835 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1302,9 +1302,29 @@ xfs_fs_remount(
1302 mp->m_flags &= ~XFS_MOUNT_BARRIER; 1302 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1303 break; 1303 break;
1304 default: 1304 default:
1305 /*
1306 * Logically we would return an error here to prevent
1307 * users from believing they might have changed
1308 * mount options using remount which can't be changed.
1309 *
1310 * But unfortunately mount(8) adds all options from
1311 * mtab and fstab to the mount arguments in some cases
1312 * so we can't blindly reject options, but have to
1313 * check for each specified option if it actually
1314 * differs from the currently set option and only
1315 * reject it if that's the case.
1316 *
1317 * Until that is implemented we return success for
1318 * every remount request, and silently ignore all
1319 * options that we can't actually change.
1320 */
1321#if 0
1305 printk(KERN_INFO 1322 printk(KERN_INFO
1306 "XFS: mount option \"%s\" not supported for remount\n", p); 1323 "XFS: mount option \"%s\" not supported for remount\n", p);
1307 return -EINVAL; 1324 return -EINVAL;
1325#else
1326 return 0;
1327#endif
1308 } 1328 }
1309 } 1329 }
1310 1330
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 608c30c3f76b..002fc2617c8e 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -732,6 +732,7 @@ xfs_buf_item_init(
732 bip->bli_item.li_ops = &xfs_buf_item_ops; 732 bip->bli_item.li_ops = &xfs_buf_item_ops;
733 bip->bli_item.li_mountp = mp; 733 bip->bli_item.li_mountp = mp;
734 bip->bli_buf = bp; 734 bip->bli_buf = bp;
735 xfs_buf_hold(bp);
735 bip->bli_format.blf_type = XFS_LI_BUF; 736 bip->bli_format.blf_type = XFS_LI_BUF;
736 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); 737 bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
737 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); 738 bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
@@ -867,6 +868,21 @@ xfs_buf_item_dirty(
867 return (bip->bli_flags & XFS_BLI_DIRTY); 868 return (bip->bli_flags & XFS_BLI_DIRTY);
868} 869}
869 870
871STATIC void
872xfs_buf_item_free(
873 xfs_buf_log_item_t *bip)
874{
875#ifdef XFS_TRANS_DEBUG
876 kmem_free(bip->bli_orig);
877 kmem_free(bip->bli_logged);
878#endif /* XFS_TRANS_DEBUG */
879
880#ifdef XFS_BLI_TRACE
881 ktrace_free(bip->bli_trace);
882#endif
883 kmem_zone_free(xfs_buf_item_zone, bip);
884}
885
870/* 886/*
871 * This is called when the buf log item is no longer needed. It should 887 * This is called when the buf log item is no longer needed. It should
872 * free the buf log item associated with the given buffer and clear 888 * free the buf log item associated with the given buffer and clear
@@ -887,18 +903,8 @@ xfs_buf_item_relse(
887 (XFS_BUF_IODONE_FUNC(bp) != NULL)) { 903 (XFS_BUF_IODONE_FUNC(bp) != NULL)) {
888 XFS_BUF_CLR_IODONE_FUNC(bp); 904 XFS_BUF_CLR_IODONE_FUNC(bp);
889 } 905 }
890 906 xfs_buf_rele(bp);
891#ifdef XFS_TRANS_DEBUG 907 xfs_buf_item_free(bip);
892 kmem_free(bip->bli_orig);
893 bip->bli_orig = NULL;
894 kmem_free(bip->bli_logged);
895 bip->bli_logged = NULL;
896#endif /* XFS_TRANS_DEBUG */
897
898#ifdef XFS_BLI_TRACE
899 ktrace_free(bip->bli_trace);
900#endif
901 kmem_zone_free(xfs_buf_item_zone, bip);
902} 908}
903 909
904 910
@@ -1120,6 +1126,7 @@ xfs_buf_iodone(
1120 1126
1121 ASSERT(bip->bli_buf == bp); 1127 ASSERT(bip->bli_buf == bp);
1122 1128
1129 xfs_buf_rele(bp);
1123 mp = bip->bli_item.li_mountp; 1130 mp = bip->bli_item.li_mountp;
1124 1131
1125 /* 1132 /*
@@ -1136,18 +1143,7 @@ xfs_buf_iodone(
1136 * xfs_trans_delete_ail() drops the AIL lock. 1143 * xfs_trans_delete_ail() drops the AIL lock.
1137 */ 1144 */
1138 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); 1145 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
1139 1146 xfs_buf_item_free(bip);
1140#ifdef XFS_TRANS_DEBUG
1141 kmem_free(bip->bli_orig);
1142 bip->bli_orig = NULL;
1143 kmem_free(bip->bli_logged);
1144 bip->bli_logged = NULL;
1145#endif /* XFS_TRANS_DEBUG */
1146
1147#ifdef XFS_BLI_TRACE
1148 ktrace_free(bip->bli_trace);
1149#endif
1150 kmem_zone_free(xfs_buf_item_zone, bip);
1151} 1147}
1152 1148
1153#if defined(XFS_BLI_TRACE) 1149#if defined(XFS_BLI_TRACE)
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 760f4c5b5160..75b0cd4da0ea 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -149,7 +149,14 @@ xfs_swap_extents(
149 149
150 sbp = &sxp->sx_stat; 150 sbp = &sxp->sx_stat;
151 151
152 xfs_lock_two_inodes(ip, tip, lock_flags); 152 /*
153 * we have to do two separate lock calls here to keep lockdep
154 * happy. If we try to get all the locks in one call, lock will
155 * report false positives when we drop the ILOCK and regain them
156 * below.
157 */
158 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
159 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
153 locked = 1; 160 locked = 1;
154 161
155 /* Verify that both files have the same format */ 162 /* Verify that both files have the same format */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 00e80df9dd9d..dbd9cef852ec 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -4118,7 +4118,7 @@ xfs_iext_indirect_to_direct(
4118 ASSERT(nextents <= XFS_LINEAR_EXTS); 4118 ASSERT(nextents <= XFS_LINEAR_EXTS);
4119 size = nextents * sizeof(xfs_bmbt_rec_t); 4119 size = nextents * sizeof(xfs_bmbt_rec_t);
4120 4120
4121 xfs_iext_irec_compact_full(ifp); 4121 xfs_iext_irec_compact_pages(ifp);
4122 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 4122 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
4123 4123
4124 ep = ifp->if_u1.if_ext_irec->er_extbuf; 4124 ep = ifp->if_u1.if_ext_irec->er_extbuf;
@@ -4449,8 +4449,7 @@ xfs_iext_irec_remove(
4449 * compaction policy is as follows: 4449 * compaction policy is as follows:
4450 * 4450 *
4451 * Full Compaction: Extents fit into a single page (or inline buffer) 4451 * Full Compaction: Extents fit into a single page (or inline buffer)
4452 * Full Compaction: Extents occupy less than 10% of allocated space 4452 * Partial Compaction: Extents occupy less than 50% of allocated space
4453 * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
4454 * No Compaction: Extents occupy at least 50% of allocated space 4453 * No Compaction: Extents occupy at least 50% of allocated space
4455 */ 4454 */
4456void 4455void
@@ -4471,8 +4470,6 @@ xfs_iext_irec_compact(
4471 xfs_iext_direct_to_inline(ifp, nextents); 4470 xfs_iext_direct_to_inline(ifp, nextents);
4472 } else if (nextents <= XFS_LINEAR_EXTS) { 4471 } else if (nextents <= XFS_LINEAR_EXTS) {
4473 xfs_iext_indirect_to_direct(ifp); 4472 xfs_iext_indirect_to_direct(ifp);
4474 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
4475 xfs_iext_irec_compact_full(ifp);
4476 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { 4473 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
4477 xfs_iext_irec_compact_pages(ifp); 4474 xfs_iext_irec_compact_pages(ifp);
4478 } 4475 }
@@ -4496,7 +4493,7 @@ xfs_iext_irec_compact_pages(
4496 erp_next = erp + 1; 4493 erp_next = erp + 1;
4497 if (erp_next->er_extcount <= 4494 if (erp_next->er_extcount <=
4498 (XFS_LINEAR_EXTS - erp->er_extcount)) { 4495 (XFS_LINEAR_EXTS - erp->er_extcount)) {
4499 memmove(&erp->er_extbuf[erp->er_extcount], 4496 memcpy(&erp->er_extbuf[erp->er_extcount],
4500 erp_next->er_extbuf, erp_next->er_extcount * 4497 erp_next->er_extbuf, erp_next->er_extcount *
4501 sizeof(xfs_bmbt_rec_t)); 4498 sizeof(xfs_bmbt_rec_t));
4502 erp->er_extcount += erp_next->er_extcount; 4499 erp->er_extcount += erp_next->er_extcount;
@@ -4516,91 +4513,6 @@ xfs_iext_irec_compact_pages(
4516} 4513}
4517 4514
4518/* 4515/*
4519 * Fully compact the extent records managed by the indirection array.
4520 */
4521void
4522xfs_iext_irec_compact_full(
4523 xfs_ifork_t *ifp) /* inode fork pointer */
4524{
4525 xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */
4526 xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */
4527 int erp_idx = 0; /* extent irec index */
4528 int ext_avail; /* empty entries in ex list */
4529 int ext_diff; /* number of exts to add */
4530 int nlists; /* number of irec's (ex lists) */
4531
4532 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4533
4534 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4535 erp = ifp->if_u1.if_ext_irec;
4536 ep = &erp->er_extbuf[erp->er_extcount];
4537 erp_next = erp + 1;
4538 ep_next = erp_next->er_extbuf;
4539
4540 while (erp_idx < nlists - 1) {
4541 /*
4542 * Check how many extent records are available in this irec.
4543 * If there is none skip the whole exercise.
4544 */
4545 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
4546 if (ext_avail) {
4547
4548 /*
4549 * Copy over as many as possible extent records into
4550 * the previous page.
4551 */
4552 ext_diff = MIN(ext_avail, erp_next->er_extcount);
4553 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
4554 erp->er_extcount += ext_diff;
4555 erp_next->er_extcount -= ext_diff;
4556
4557 /*
4558 * If the next irec is empty now we can simply
4559 * remove it.
4560 */
4561 if (erp_next->er_extcount == 0) {
4562 /*
4563 * Free page before removing extent record
4564 * so er_extoffs don't get modified in
4565 * xfs_iext_irec_remove.
4566 */
4567 kmem_free(erp_next->er_extbuf);
4568 erp_next->er_extbuf = NULL;
4569 xfs_iext_irec_remove(ifp, erp_idx + 1);
4570 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4571 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4572
4573 /*
4574 * If the next irec is not empty move up the content
4575 * that has not been copied to the previous page to
4576 * the beggining of this one.
4577 */
4578 } else {
4579 memmove(erp_next->er_extbuf, &ep_next[ext_diff],
4580 erp_next->er_extcount *
4581 sizeof(xfs_bmbt_rec_t));
4582 ep_next = erp_next->er_extbuf;
4583 memset(&ep_next[erp_next->er_extcount], 0,
4584 (XFS_LINEAR_EXTS -
4585 erp_next->er_extcount) *
4586 sizeof(xfs_bmbt_rec_t));
4587 }
4588 }
4589
4590 if (erp->er_extcount == XFS_LINEAR_EXTS) {
4591 erp_idx++;
4592 if (erp_idx < nlists)
4593 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4594 else
4595 break;
4596 }
4597 ep = &erp->er_extbuf[erp->er_extcount];
4598 erp_next = erp + 1;
4599 ep_next = erp_next->er_extbuf;
4600 }
4601}
4602
4603/*
4604 * This is called to update the er_extoff field in the indirection 4516 * This is called to update the er_extoff field in the indirection
4605 * array when extents have been added or removed from one of the 4517 * array when extents have been added or removed from one of the
4606 * extent lists. erp_idx contains the irec index to begin updating 4518 * extent lists. erp_idx contains the irec index to begin updating
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ccba14eb9dbe..0b02c6443551 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
124STATIC int xlog_iclogs_empty(xlog_t *log); 124STATIC int xlog_iclogs_empty(xlog_t *log);
125 125
126#if defined(XFS_LOG_TRACE) 126#if defined(XFS_LOG_TRACE)
127
128#define XLOG_TRACE_LOGGRANT_SIZE 2048
129#define XLOG_TRACE_ICLOG_SIZE 256
130
131void
132xlog_trace_loggrant_alloc(xlog_t *log)
133{
134 log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
135}
136
137void
138xlog_trace_loggrant_dealloc(xlog_t *log)
139{
140 ktrace_free(log->l_grant_trace);
141}
142
127void 143void
128xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) 144xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
129{ 145{
130 unsigned long cnts; 146 unsigned long cnts;
131 147
132 if (!log->l_grant_trace) {
133 log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
134 if (!log->l_grant_trace)
135 return;
136 }
137 /* ticket counts are 1 byte each */ 148 /* ticket counts are 1 byte each */
138 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; 149 cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
139 150
@@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
157} 168}
158 169
159void 170void
171xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
172{
173 iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
174}
175
176void
177xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
178{
179 ktrace_free(iclog->ic_trace);
180}
181
182void
160xlog_trace_iclog(xlog_in_core_t *iclog, uint state) 183xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
161{ 184{
162 if (!iclog->ic_trace)
163 iclog->ic_trace = ktrace_alloc(256, KM_NOFS);
164 ktrace_enter(iclog->ic_trace, 185 ktrace_enter(iclog->ic_trace,
165 (void *)((unsigned long)state), 186 (void *)((unsigned long)state),
166 (void *)((unsigned long)current_pid()), 187 (void *)((unsigned long)current_pid()),
@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
170 (void *)NULL, (void *)NULL); 191 (void *)NULL, (void *)NULL);
171} 192}
172#else 193#else
194
195#define xlog_trace_loggrant_alloc(log)
196#define xlog_trace_loggrant_dealloc(log)
173#define xlog_trace_loggrant(log,tic,string) 197#define xlog_trace_loggrant(log,tic,string)
198
199#define xlog_trace_iclog_alloc(iclog)
200#define xlog_trace_iclog_dealloc(iclog)
174#define xlog_trace_iclog(iclog,state) 201#define xlog_trace_iclog(iclog,state)
202
175#endif /* XFS_LOG_TRACE */ 203#endif /* XFS_LOG_TRACE */
176 204
177 205
@@ -1005,11 +1033,12 @@ xlog_iodone(xfs_buf_t *bp)
1005 l = iclog->ic_log; 1033 l = iclog->ic_log;
1006 1034
1007 /* 1035 /*
1008 * If the ordered flag has been removed by a lower 1036 * If the _XFS_BARRIER_FAILED flag was set by a lower
1009 * layer, it means the underlyin device no longer supports 1037 * layer, it means the underlying device no longer supports
1010 * barrier I/O. Warn loudly and turn off barriers. 1038 * barrier I/O. Warn loudly and turn off barriers.
1011 */ 1039 */
1012 if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { 1040 if (bp->b_flags & _XFS_BARRIER_FAILED) {
1041 bp->b_flags &= ~_XFS_BARRIER_FAILED;
1013 l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; 1042 l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
1014 xfs_fs_cmn_err(CE_WARN, l->l_mp, 1043 xfs_fs_cmn_err(CE_WARN, l->l_mp,
1015 "xlog_iodone: Barriers are no longer supported" 1044 "xlog_iodone: Barriers are no longer supported"
@@ -1231,6 +1260,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1231 spin_lock_init(&log->l_grant_lock); 1260 spin_lock_init(&log->l_grant_lock);
1232 sv_init(&log->l_flush_wait, 0, "flush_wait"); 1261 sv_init(&log->l_flush_wait, 0, "flush_wait");
1233 1262
1263 xlog_trace_loggrant_alloc(log);
1234 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1264 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1235 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1265 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
1236 1266
@@ -1285,6 +1315,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1285 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); 1315 sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
1286 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); 1316 sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
1287 1317
1318 xlog_trace_iclog_alloc(iclog);
1319
1288 iclogp = &iclog->ic_next; 1320 iclogp = &iclog->ic_next;
1289 } 1321 }
1290 *iclogp = log->l_iclog; /* complete ring */ 1322 *iclogp = log->l_iclog; /* complete ring */
@@ -1565,11 +1597,7 @@ xlog_dealloc_log(xlog_t *log)
1565 sv_destroy(&iclog->ic_force_wait); 1597 sv_destroy(&iclog->ic_force_wait);
1566 sv_destroy(&iclog->ic_write_wait); 1598 sv_destroy(&iclog->ic_write_wait);
1567 xfs_buf_free(iclog->ic_bp); 1599 xfs_buf_free(iclog->ic_bp);
1568#ifdef XFS_LOG_TRACE 1600 xlog_trace_iclog_dealloc(iclog);
1569 if (iclog->ic_trace != NULL) {
1570 ktrace_free(iclog->ic_trace);
1571 }
1572#endif
1573 next_iclog = iclog->ic_next; 1601 next_iclog = iclog->ic_next;
1574 kmem_free(iclog); 1602 kmem_free(iclog);
1575 iclog = next_iclog; 1603 iclog = next_iclog;
@@ -1578,14 +1606,7 @@ xlog_dealloc_log(xlog_t *log)
1578 spinlock_destroy(&log->l_grant_lock); 1606 spinlock_destroy(&log->l_grant_lock);
1579 1607
1580 xfs_buf_free(log->l_xbuf); 1608 xfs_buf_free(log->l_xbuf);
1581#ifdef XFS_LOG_TRACE 1609 xlog_trace_loggrant_dealloc(log);
1582 if (log->l_trace != NULL) {
1583 ktrace_free(log->l_trace);
1584 }
1585 if (log->l_grant_trace != NULL) {
1586 ktrace_free(log->l_grant_trace);
1587 }
1588#endif
1589 log->l_mp->m_log = NULL; 1610 log->l_mp->m_log = NULL;
1590 kmem_free(log); 1611 kmem_free(log);
1591} /* xlog_dealloc_log */ 1612} /* xlog_dealloc_log */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c8a5b22ee3e3..e7d8f84443fa 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -448,7 +448,6 @@ typedef struct log {
448 int l_grant_write_bytes; 448 int l_grant_write_bytes;
449 449
450#ifdef XFS_LOG_TRACE 450#ifdef XFS_LOG_TRACE
451 struct ktrace *l_trace;
452 struct ktrace *l_grant_trace; 451 struct ktrace *l_grant_trace;
453#endif 452#endif
454 453
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index aa238c8fbd7a..8b6812f66a15 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1838,6 +1838,12 @@ again:
1838#endif 1838#endif
1839} 1839}
1840 1840
1841/*
1842 * xfs_lock_two_inodes() can only be used to lock one type of lock
1843 * at a time - the iolock or the ilock, but not both at once. If
1844 * we lock both at once, lockdep will report false positives saying
1845 * we have violated locking orders.
1846 */
1841void 1847void
1842xfs_lock_two_inodes( 1848xfs_lock_two_inodes(
1843 xfs_inode_t *ip0, 1849 xfs_inode_t *ip0,
@@ -1848,6 +1854,8 @@ xfs_lock_two_inodes(
1848 int attempts = 0; 1854 int attempts = 0;
1849 xfs_log_item_t *lp; 1855 xfs_log_item_t *lp;
1850 1856
1857 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
1858 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
1851 ASSERT(ip0->i_ino != ip1->i_ino); 1859 ASSERT(ip0->i_ino != ip1->i_ino);
1852 1860
1853 if (ip0->i_ino > ip1->i_ino) { 1861 if (ip0->i_ino > ip1->i_ino) {
@@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */
3152/* 3160/*
3153 * Zero file bytes between startoff and endoff inclusive. 3161 * Zero file bytes between startoff and endoff inclusive.
3154 * The iolock is held exclusive and no blocks are buffered. 3162 * The iolock is held exclusive and no blocks are buffered.
3163 *
3164 * This function is used by xfs_free_file_space() to zero
3165 * partial blocks when the range to free is not block aligned.
3166 * When unreserving space with boundaries that are not block
3167 * aligned we round up the start and round down the end
3168 * boundaries and then use this function to zero the parts of
3169 * the blocks that got dropped during the rounding.
3155 */ 3170 */
3156STATIC int 3171STATIC int
3157xfs_zero_remaining_bytes( 3172xfs_zero_remaining_bytes(
@@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes(
3168 int nimap; 3183 int nimap;
3169 int error = 0; 3184 int error = 0;
3170 3185
3186 /*
3187 * Avoid doing I/O beyond eof - it's not necessary
3188 * since nothing can read beyond eof. The space will
3189 * be zeroed when the file is extended anyway.
3190 */
3191 if (startoff >= ip->i_size)
3192 return 0;
3193
3194 if (endoff > ip->i_size)
3195 endoff = ip->i_size;
3196
3171 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 3197 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
3172 XFS_IS_REALTIME_INODE(ip) ? 3198 XFS_IS_REALTIME_INODE(ip) ?
3173 mp->m_rtdev_targp : mp->m_ddev_targp); 3199 mp->m_rtdev_targp : mp->m_ddev_targp);